diff --git a/CMake/bin2hex.cmake b/CMake/bin2hex.cmake new file mode 100644 index 00000000..603c9a6c --- /dev/null +++ b/CMake/bin2hex.cmake @@ -0,0 +1,26 @@ +# A small utility function which generates a C-header from an input file +function(FILE_TO_C_STRING FILENAME VARIABLE_NAME OUTPUT_STRING ZERO_TERMINATED) + FILE(READ "${FILENAME}" HEX_INPUT HEX) + if (${ZERO_TERMINATED}) + string(APPEND HEX_INPUT "00") + endif() + + string(REGEX REPLACE "(....)" "\\1\n" HEX_OUTPUT ${HEX_INPUT}) + string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," HEX_OUTPUT ${HEX_OUTPUT}) + + set(HEX_OUTPUT "static char const ${VARIABLE_NAME}[] = {\n ${HEX_OUTPUT}\n};\n") + + set(${OUTPUT_STRING} "${HEX_OUTPUT}" PARENT_SCOPE) +endfunction() + +message("Create header file for ${FILE_IN}") +message("Create header file for ${FILE_OUT}") +file_to_c_string(${FILE_IN} ${VARIABLE_NAME} OUTPUT_STRING ZERO_TERMINATED) + +set(RESULT "#pragma once\n") +string(APPEND RESULT "namespace cutlass {\n") +string(APPEND RESULT "namespace nvrtc {\n") +string(APPEND RESULT "${OUTPUT_STRING}") +string(APPEND RESULT "} // namespace nvrtc\n") +string(APPEND RESULT "} // namespace cutlass\n") +file(WRITE "${FILE_OUT}" "${RESULT}") diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..5a53fae5 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,182 @@ +# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +cmake_minimum_required(VERSION 3.3.0) + +set(CUTLASS_LANGUAGES CXX) + +# CMake 3.9.0 has native support for CUDA without the need of the CUDA package. Use it! +if(WIN32 AND NOT ${CMAKE_VERSION} VERSION_LESS "3.9.0") + list(APPEND CUTLASS_LANGUAGES CUDA) + set(CUTLASS_NATIVE_CUDA TRUE) + + macro(cutlass_add_executable) + add_executable(${ARGN}) + endmacro() +else() + # FindCUDA fails to detect VS 2017 due to a changed directory format of the toolkits. + # For this configuration we need CMake >= 3.9.0 to use the native CUDA support. + if (WIN32 AND MSVC_VERSION GREATER 1800) + message(FATAL_ERROR "Please upgrade CMake to version >= 3.9.0 to support Visual Studio 2017 or higher") + endif() + + # Fall back to the FindCUDA version to create an executable with CUDA files + macro(cutlass_add_executable) + cuda_add_executable(${ARGN}) + endmacro() +endif() + +project(CUTLASS ${CUTLASS_LANGUAGES}) + +# check if the configuration is supported +if( NOT CMAKE_SIZEOF_VOID_P EQUAL 8 ) + message(FATAL_ERROR "CUTLASS requires a 64-bit compiler!") +endif() + +find_package(CUDA) +find_package(Doxygen QUIET) + +# By default we want to build in Release mode to ensure that we're getting best performance +if (NOT (CMAKE_BUILD_TYPE OR CONFIGURATION_TYPES)) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose build level" FORCE) + # We do support Debug or Release builds + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release") +endif() + +if(WIN32) + # On Windows we link against the shared (DLL) runtime. Change gtest settings to match this. + set(gtest_force_shared_crt ON CACHE BOOL "Use shared (DLL) run-time lib even when Google Test is built as static lib" FORCE) +endif() + +if (WIN32) + # Enable more warnings and treat as errors + string(APPEND NVCC_FLAGS " -Xcompiler /W3 -Xcompiler /WX") + + # Disable excess x86 floating point precision that can lead to results being labeled incorrectly + string(APPEND NVCC_FLAGS " -Xcompiler /fp:strict") + + # Verbose option + if (${CUTLASS_NVCC_VERBOSE}) + string(APPEND NVCC_FLAGS " -v") + endif() +endif(WIN32) + +# Configure CUDA options +set(CUTLASS_NVCC_ARCHS "50;60;61;70" CACHE STRING "The SM architectures to build code for.") +set(CUTLASS_NVCC_KEEP OFF CACHE BOOL "Keep intermediate files generated by NVCC.") + +foreach(ARCH ${CUTLASS_NVCC_ARCHS}) + string(APPEND NVCC_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}") +endforeach() + + +if (CUTLASS_NVCC_KEEP) + string(APPEND NVCC_FLAGS " -keep") +endif() + +if (WIN32 AND CUTLASS_NATIVE_CUDA) + string(APPEND NVCC_FLAGS_RELEASE " -lineinfo") +else() + string(APPEND NVCC_FLAGS " -lineinfo") +endif() + +if (UNIX) + string(APPEND NVCC_FLAGS " -Xcompiler -Wconversion") +endif() + +string(APPEND NVCC_FLAGS_DEBUG " -g") +string(APPEND NVCC_FLAGS_RELEASE " -O3") + +# define NDEBUG for release mode to disable assertions +string(APPEND NVCC_FLAGS_RELEASE " -DNDEBUG") + +if (CUTLASS_NATIVE_CUDA) + set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS}") + set(CMAKE_CUDA_FLAGS_DEBUG "${NVCC_FLAGS_DEBUG}") + set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_RELEASE}") +else() + set(CUDA_NVCC_FLAGS ${NVCC_FLAGS}) + set(CUDA_NVCC_FLAGS_DEBUG ${NVCC_FLAGS_DEBUG}) + set(CUDA_NVCC_FLAGS_RELEASE ${NVCC_FLAGS_RELEASE}) +endif() + +# +# The following items should eventually be pushed into cutlass/CMakeLists.txt +# + +# GLOB for CUTLASS header files. Should we use a static list instead? +file(GLOB CUTLASS_GEMM RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/gemm/*.h) +file(GLOB CUTLASS_UTIL RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/util/*.h) +file(GLOB CUTLASS_DEVICE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/device/*.h) +file(GLOB CUTLASS_CORE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/*.h) + +source_group("cutlass\\gemm" FILES ${CUTLASS_GEMM}) +source_group("cutlass\\util" FILES ${CUTLASS_UTIL}) +source_group("cutlass\\device" FILES ${CUTLASS_DEVICE}) +source_group("cutlass" FILES ${CUTLASS_CORE}) + +add_library(CUTLASS INTERFACE) +include_directories("${CMAKE_CURRENT_SOURCE_DIR}") +target_sources(CUTLASS INTERFACE + ${CUTLASS_GEMM} + ${CUTLASS_UTIL} + ${CUTLASS_DEVICE} + ${CUTLASS_CORE} +) + +target_include_directories(CUTLASS INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) + +# Create a custom target to ensure that the CUTLASS sources are visible in an IDE +add_custom_target(cutlass_ide SOURCES + ${CUTLASS_GEMM} + ${CUTLASS_UTIL} + ${CUTLASS_DEVICE} + ${CUTLASS_CORE} +) +# Doxygen is available. Generate documentation +if (DOXYGEN_FOUND) + # DOT is available. Enable graph generation in the documentation + if (DOXYGEN_DOT_EXECUTABLE) + set(CUTLASS_ENABLE_DOXYGEN_DOT ON CACHE BOOL "Use dot to generate graphs in the doxygen documentation.") + else() + set(CUTLASS_ENABLE_DOXYGEN_DOT OFF CACHE BOOL "Use dot to generate graphs in the doxygen documentation." FORCE) + endif() + + if (CUTLASS_ENABLE_DOXYGEN_DOT) + set(HAVE_DOT "YES") + else() + set(HAVE_DOT "NO") + endif() + + # Add custom target for Doxygen. + add_custom_target(cutlass_docs ${CMAKE_COMMAND} -E env + "DOT_PATH=${DOXYGEN_DOT_EXECUTABLE}" + "HAVE_DOT=${HAVE_DOT}" + ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + VERBATIM + ) +endif() + + +#add_subdirectory(examples/gemm) +add_subdirectory(tools) diff --git a/Doxyfile b/Doxyfile index 15650b20..51cec529 100644 --- a/Doxyfile +++ b/Doxyfile @@ -58,7 +58,7 @@ PROJECT_LOGO = # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. -OUTPUT_DIRECTORY = doxygen +OUTPUT_DIRECTORY = docs # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and @@ -218,7 +218,8 @@ TAB_SIZE = 4 # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. -ALIASES = +#ALIASES += "concept{1}=@ingroup \1\n@par Implemented concepts:\n@ref \1" +ALIASES += "concept{1}=@ingroup \1" # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" @@ -396,7 +397,7 @@ LOOKUP_CACHE_SIZE = 0 # normally produced when WARNINGS is set to YES. # The default value is: NO. -EXTRACT_ALL = NO +EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class will # be included in the documentation. @@ -733,7 +734,7 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = cutlass cutlass/gemm cutlass/util +INPUT = cutlass # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -759,7 +760,7 @@ FILE_PATTERNS = # be searched for input files as well. # The default value is: NO. -RECURSIVE = NO +RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a @@ -2032,7 +2033,7 @@ HIDE_UNDOC_RELATIONS = YES # set to NO # The default value is: NO. -HAVE_DOT = NO +HAVE_DOT = $(HAVE_DOT) # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed # to run in parallel. When set to 0 doxygen will base this on the number of @@ -2204,7 +2205,7 @@ INTERACTIVE_SVG = NO # found. If left blank, it is assumed the dot tool can be found in the path. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_PATH = +DOT_PATH = $(DOT_PATH) # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the \dotfile diff --git a/README.md b/README.md index a13c9a4c..05a0d3a3 100644 --- a/README.md +++ b/README.md @@ -1,106 +1,213 @@ -![ALT](/media/fig-09-complete-hierarchy.png "Complete CUDA GEMM decomposition") +![ALT](/media/images/gemm-hierarchy-with-epilogue-no-labels.png "Complete CUDA GEMM decomposition") -# Introduction +# CUTLASS 1.0 -CUTLASS is a collection of CUDA C++ template abstractions for implementing -high-performance matrix-multiplication (GEMM) at all levels and scales within CUDA. -It incorporates strategies for hierarchical decomposition and data movement similar -to those used to implement cuBLAS. CUTLASS decomposes these "moving parts" into +CUTLASS 1.0 is a collection of CUDA C++ template abstractions for implementing +high-performance matrix-multiplication (GEMM) at all levels and scales within CUDA. +It incorporates strategies for hierarchical decomposition and data movement similar +to those used to implement cuBLAS. CUTLASS decomposes these "moving parts" into reusable, modular software components abstracted by C++ template classes. These -thread-wide, warp-wide, block-wide, and device-wide primitives can be specialized -and tuned via custom tiling sizes, data types, and other algorithmic policy. The -resulting flexibility simplifies their use as building blocks within custom kernels +thread-wide, warp-wide, block-wide, and device-wide primitives can be specialized +and tuned via custom tiling sizes, data types, and other algorithmic policy. The +resulting flexibility simplifies their use as building blocks within custom kernels and applications. To support a wide variety of applications, CUTLASS provides extensive support for -mixed-precision computations, providing specialized data-movement and -multiply-accumulate abstractions for 8-bit integer, half-precision floating -point (FP16), single-precision floating point (FP32), and double-precision floating +mixed-precision computations, providing specialized data-movement and +multiply-accumulate abstractions for 8-bit integer, half-precision floating +point (FP16), single-precision floating point (FP32), and double-precision floating point (FP64) types. Furthermore, CUTLASS demonstrates CUDA's WMMA API for targeting -the programmable, high-throughput _Tensor Cores_ provided by NVIDIA's Volta architecture +the programmable, high-throughput _Tensor Cores_ provided by NVIDIA's Volta architecture and beyond. -For more exposition, see our Parallel Forall blog post [CUTLASS: Fast Linear Algebra -in CUDA C++](https://devblogs.nvidia.com/parallelforall/cutlass-linear-algebra-cuda). +CUTLASS 1.0 has changed substantially from our preview release described in +the [CUTLASS Parallel For All](https://devblogs.nvidia.com/parallelforall/cutlass-linear-algebra-cuda) +post. We have decomposed the structure of the GEMM computation into deeper, structured +primitives for loading data, computing predicate masks, streaming data at each level of +the GEMM hierarchy, and updating the output matrix. + +CUTLASS 1.0 is described in the [Doxygen documentation](https://github.com/NVIDIA/cutlass/docs) +and our talk at the GPU Technology Conference 2018 (login required). # Performance -

+

+ +CUTLASS primitives are very efficient. When used to construct device-wide GEMM kernels, +they exhibit performance comparable to cuBLAS for scalar GEMM +computations. The above figure shows CUTLASS performance relative to cuBLAS +for large matrix dimensions (M=10240, N=K=4096) running on an NVIDIA Titan V GPU +when compiled with CUDA 9.2. + +# Compatibility + +CUTLASS requires CUDA 9 and performs best with [CUDA 9.2 Toolkit](ttps://developer.nvidia.com/cuda-toolkit) or later. + +|**Operating System** | **Compiler** | +|-----------------|----------| +| Windows 10 | Microsoft Visual Studio 2015| +| | Microsoft Visual Studio 2017| +| Ubuntu 14.04 | GCC 4.8.2 | +| Ubuntu 16.04 | GCC 5.4.0 | + + +CUTLASS runs successfully on the following NVIDIA GPUs, and it is expected to be efficient on +any Maxwell-, Pascal-, or Volta-architecture NVIDIA GPU. + +|**GPU**| +|---| +|NVIDIA GeForce 1080| +|NVIDIA TitanXP| +|NVIDIA Tesla P100| +|NVIDIA Tesla V100| +|NVIDIA TitanV| + + +# Building CUTLASS + +CUTLASS is a header-only template library and does not need to be built to be used by other +projects. However, we distribute extensive unit tests and utility programs to demonstrate +CUTLASS. These instructions are for building those test programs. + +CUTLASS's unit tests depend on Google Test which exists as a git submodule. You can fetch +submodules as follows. + +``` +$ git submodule update --init --recursive +``` + +CUTLASS can be build with CMake starting version 3.10. By default CUTLASS will build kernels +for CUDA architecture versions 5.0, 6.0, 6.1 and 7.0. To reduce compile time you can specify +the architectures to build CUTLASS for by changing the CMake configuration setting +`CUTLASS_NVCC_ARCHS`. + +Create a build directory within the CUTLASS project, then run CMake once. + +``` +$ mkdir build && cd build +$ cmake .. +``` + +Compile the CUTLASS project by running Make. Include the -j argument to compile sources in +parallel and speed up the build process. + +``` +$ make -j12 +... +$ +``` + +Verify CUTLASS has been built correctly by running the unit tests from the build/ directory. + +``` +$ ./tools/test/unit/cutlass_unit_test +... +... +... +[----------] Global test environment tear-down +[==========] 481 tests from 24 test cases ran. (5954 ms total) +[ PASSED ] 481 tests. +``` + +All tests should pass, though the exact number of tests may vary over time. -CUTLASS primitives are very efficient. When used to construct device-wide GEMM kernels, -they exhibit performance comparable to cuBLAS for scalar GEMM -computations. The above figure shows CUTLASS performance relative to cuBLAS -for large matrix dimensions (M=10240, N=K=4096) running on an NVIDIA Tesla V100 GPU -when compiled with CUDA 9.0. # Project Structure CUTLASS is arranged as a header-only library with several example test programs -that demonstrate instantiating a GEMM task within a CUDA kernel. Comments inline -with the source explain the individual components. +that demonstrate instantiating a GEMM task within a CUDA kernel. The Doxygen documentation +provides a complete list of files, classes, and template concepts defined in the CUTLASS +project. A brief summary is described below. -The repository is organized in the following arrangement. - - cutlass/ Root of header-only source library for matrix multiply - gemm/ Implementation of GEMM __device__ code and supporting components - util/ Utility components for CUDA device-side CUDA development - -A test program is provided to illustrate the use of CUTLASS. This is implemented -in the following directory. - - cutlass_test Root of test programs depicting CUTLASS kernels - util/ Utilities - gemm.cu Simple example calling CUTLASS and CUBLAS GEMM kernels - Makefile Build script for test programs - - -# Makefile usage - -There are different sample targets for different GEMM data types and -transposititions. Be sure to specify your target architecture. - - make sm=<60|61|70> \ - [transpose=] [verbose=<0|1>] [keep=<0|1>] - - -# Program usage - - Program usage: - - gemm_ - [--help] - [--schmoo=<#schmoo-samples> || --m= --n= --k=] - [--i=] - [--device=] - [--alpha= --beta=] - - -# Open Source License - -CUTLASS is released by NVIDIA Corporation under the "New BSD" open-source license: +The CUTLASS library is defined in the cutlass/ directory and consists of CUDA C++ template +classes and other definitions for implementing efficient GPU GEMM kernels. A set of core +classes and templates define basic primitives that are then applied to compute GEMM via +templates in the cutlass/gemm directory. ``` -Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the NVIDIA CORPORATION nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +cutlass/ + gemm/ + util/ + ``` + +Several tools and test programs are also distributed with the CUTLASS library. They are +contained in the following directories. + +``` +tools/ + test/ + unit/ + core/ + gemm/ + perf/ + util/ + +``` + +The `test/unit/` directory consist of unit tests implemented with Google Test that demonstrate +basic usage of Core API components and complete tests of the CUTLASS GEMM computations. + +# Performance Profiling + +The `test/perf/` directory contains a command-line utility for launching each of the GEMM kernels. +Its usage is shown below. + +Program usage: + +``` + cutlass_perf_test [options] + + --help + --append= If true, appends output to existing CSV file. If false, overwrites. + --alpha= Value for alpha to be used in GEMM experiments + --beta= Value for beta to be used in GEMM experiments + --output= Writes summary of profiling to specified .csv file + --iterations= maximum number of iterations to execute when profiling + --m=[:max height[:step]] Height of GEMM problem (number of rows of C). May specify a range with optional step size. + --n=[:max width[:step]] Width of GEMM problem (number of columns of C). May specify a range with optional step size. + --k=[:max depth[:step]] Size of inner dimension of A and B. May specify a range with optional step size. + --kernels=<{s|d|h|i|wmma}gemm_{nn,nt,tn,tt}> Select GEMM datatype and layout to use for tests + --peak= If true, only reports peak performance per kernel after profiling specified problem space. + --seed= Random seed used by the random number generator in initializing input matrices. + --tags= Inserts leading columns in output table and uniform values for each column. Useful for generating pivot tables. + + + Example usage: + + # Runs one problem size for all kernels + $ ./tools/test/perf/cutlass_perf_test --m=10240 --n=1024 --k=1024 + + # Varies GEMM K dimension for SGEMM and IGEMM with column-major multiplicands + $ ./tools/test/perf/cutlass_perf_test --m=10240 --n=4096 --k=1024:8192:128 --kernels=sgemm_nn,igemm_nn +``` + +# About + +CUTLASS is released by NVIDIA Corporation as Open Source software under the +3-clause "New" BSD license. + + +# Copyright + +Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + * Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/clang-format.sh b/clang-format.sh new file mode 100755 index 00000000..b2570d91 --- /dev/null +++ b/clang-format.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -e + +function formatFiles { + for f in `find "$1" -type f -name "*.$2"` ; do + COMMAND="clang-format -i $f" + echo $COMMAND + $COMMAND + done +} + +formatFiles "cutlass" "h" +formatFiles "tools/test" "h" +formatFiles "tools/test" "cpp" +formatFiles "tools/util" "h" + diff --git a/common.mk b/common.mk deleted file mode 100644 index 672ea5b2..00000000 --- a/common.mk +++ /dev/null @@ -1,181 +0,0 @@ -#/****************************************************************************** -# * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. -# * -# * Redistribution and use in source and binary forms, with or without -# * modification, are permitted provided that the following conditions are met: -# * * Redistributions of source code must retain the above copyright -# * notice, this list of conditions and the following disclaimer. -# * * Redistributions in binary form must reproduce the above copyright -# * notice, this list of conditions and the following disclaimer in the -# * documentation and/or other materials provided with the distribution. -# * * Neither the name of the NVIDIA CORPORATION nor the -# * names of its contributors may be used to endorse or promote products -# * derived from this software without specific prior written permission. -# * -# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY -# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# * -#******************************************************************************/ - - -#------------------------------------------------------------------------------- -# Commandline Options -#------------------------------------------------------------------------------- - -# sm= Compute-capability to compile for, e.g., "sm=200,300,350" (SM2.0 by default). - -COMMA := , -ifdef sm - SM_ARCH := $(subst $(COMMA),-,$(sm)) -else - $(error Please specify SM architecture makefile argument: "sm=XX") -endif - -ifeq (70, $(findstring 70, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_70,code=\"sm_70,compute_70\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_70 -endif -ifeq (62, $(findstring 62, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_62,code=\"sm_62,compute_62\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_62 -endif -ifeq (61, $(findstring 61, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_61,code=\"sm_61,compute_61\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_61 -endif -ifeq (60, $(findstring 60, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_60,code=\"sm_60,compute_60\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_60 -endif -ifeq (52, $(findstring 52, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_52,code=\"sm_52,compute_52\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_52 -endif -ifeq (37, $(findstring 37, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_37,code=\"sm_37,compute_37\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_37 -endif -ifeq (35, $(findstring 35, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_35,code=\"sm_35,compute_35\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_35 -endif -ifeq (30, $(findstring 30, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_30,code=\"sm_30,compute_30\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_30 -endif -ifeq (21, $(findstring 21, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_20,code=\"sm_21,compute_20\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_21 -endif -ifeq (20, $(findstring 20, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_20,code=\"sm_20,compute_20\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_20 -endif - - -# [verbose=<0|1>] Verbose toolchain output from nvcc option -ifeq ($(verbose), 1) - NVCCFLAGS += -v - CLANG_CFLAGS += -v -endif - - -# [keep=<0|1>] Keep intermediate compilation artifacts option -ifeq ($(keep), 1) - NVCCFLAGS += -keep - CLANG_CFLAGS += --save-temps -endif - - -# [debug=<0|1>] Generate debug mode code -ifeq ($(debug), 1) - NVCCFLAGS += -G - CLANG_CFLAGS += --cuda-noopt-device-debug -endif - - -#------------------------------------------------------------------------------- -# Compiler and compilation platform -#------------------------------------------------------------------------------- - -BASE_DIR := $(dir $(lastword $(MAKEFILE_LIST))) - -NVCC := "$(shell which nvcc)" -ifdef nvccver - NVCC_VERSION := $(nvccver) -else - NVCC_VERSION := $(strip $(shell nvcc --version | grep release | sed 's/.*release //' | sed 's/,.*//')) -endif - -# Detect OS -OSUPPER := $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:]) - -# Default flags: verbose kernel properties (regs, smem, cmem, etc.); runtimes for compilation phases -NVCCFLAGS += -O3 -Xptxas -v -CLANG_CFLAGS += -O3 -Xcuda-ptxas -v -ifeq (WIN_NT, $(findstring WIN_NT, $(OSUPPER))) - # For MSVC - - # Enable more warnings and treat as errors - NVCCFLAGS += -Xcompiler /W3 -Xcompiler /WX - - # Disable excess x86 floating point precision that can lead to results being labeled incorrectly - NVCCFLAGS += -Xcompiler /fp:strict - - # Compiler - CC := cl - - # Multithreaded runtime - NVCCFLAGS += -Xcompiler /MT - - CUDART_CYG := "$(shell dirname $(NVCC))/../lib/x64/cudart.lib" - CUDART := "$(shell cygpath -w $(CUDART_CYG))" - -else - # For g++ - - # Disable excess x86 floating point precision that can lead to results being labeled incorrectly - #NVCCFLAGS += -Xcompiler -ffloat-store - - # Compiler - CC := g++ - - CUDART := "$(shell dirname $(NVCC))/../lib64/libcudart_static.a" - -endif - -# compiler=clang Enables compilation with clang. - -ifeq ($(compiler), clang) - # NVCC_VERSION is used as the proxy for the CUDA version. - BIN_SUFFIX := sm$(SM_ARCH)_clang_cuda_$(NVCC_VERSION) - # Clangs needs few extra flags to point it to CUDA SDK - # and link the binaries with CUDA runtime. - CUDA_BASE=$(realpath $(join $(dir $(shell which nvcc)), ..)) - CLANG_CFLAGS += --cuda-path=$(CUDA_BASE) - LIBINC += -L$(CUDA_BASE)/lib64 -Wl,-rpath=$(CUDA_BASE)/lib64 - LIBS += -lcudart - - # Replace NVCC and its options with clang++. - NVCC = clang++ - NVCCFLAGS = $(CLANG_CFLAGS) - SM_TARGETS = $(CLANG_SM_TARGETS) -else - # Suffix to append to each binary - BIN_SUFFIX := sm$(SM_ARCH)_nvcc_$(NVCC_VERSION) -endif - - -#------------------------------------------------------------------------------- -# Function for computing dependency Lists -#------------------------------------------------------------------------------- - -rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) diff --git a/cutlass/convert.h b/cutlass/convert.h new file mode 100644 index 00000000..933d68a8 --- /dev/null +++ b/cutlass/convert.h @@ -0,0 +1,102 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! + \file + \brief Defines conversion operations among Fragments of different base type. +*/ +#pragma once + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Convert {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Convert, Fragment > { + /// The input fragment. + typedef Fragment InputFragment; + /// The output fragment. + typedef Fragment OutputFragment; + + /// Ctor. + CUTLASS_DEVICE Convert() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) { + transform(src, 0, dst); + } + + /// Transform a fragment. + template + CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) { + for (int i = 0; i < kScalars_; ++i) { + dst[i] = static_cast(src[i + offset]); + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Copy { + /// The input fragment. + typedef Fragment_ InputFragment; + /// The output fragment. + typedef Fragment_ OutputFragment; + + /// Ctor. + CUTLASS_DEVICE Copy() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(Fragment_ const& src, Fragment_& dst) { transform(src, 0, dst); } + + /// Transform a fragment. + template + CUTLASS_DEVICE void transform(InputFragment_ const& src, int offset, Fragment_& dst) { + if (sizeof(typename Fragment_::Element) == 8) { + uint64_t const* src_ptr = reinterpret_cast(&src[offset]); + uint64_t* dst_ptr = reinterpret_cast(&dst[0]); + for (int i = 0; i < sizeof(Fragment_) / 8; ++i) { + dst_ptr[i] = src_ptr[i]; + } + } else { + uint32_t const* src_ptr = reinterpret_cast(&src[offset]); + uint32_t* dst_ptr = reinterpret_cast(&dst[0]); + for (int i = 0; i < sizeof(Fragment_) / 4; ++i) { + dst_ptr[i] = src_ptr[i]; + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/coord.h b/cutlass/coord.h new file mode 100644 index 00000000..431c9bf1 --- /dev/null +++ b/cutlass/coord.h @@ -0,0 +1,287 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief A Coord is a coordinate of arbitrary rank into a tensor or matrix +*/ + +#pragma once + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Describes identity elements +struct Identity { + /// Enumeration describing identity elements. Value assignments are significant. + /// Feel free to add or multiply by these, respectively. + enum Kind { Additive = 0, Multiplicative = 1 }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Statically-sized array specifying Coords within a tensor +template +struct Coord { + // + // Type and constant definitions + // + + static int const N = N_; + + // + // Data members + // + + /// Indices + int idx[N]; + + // + // Methods + // + + /// Default ctor initializes uniformly + CUTLASS_HOST_DEVICE + Coord(int value = 0) { + for (int i = 0; i < N; ++i) { + idx[i] = value; + } + } + + /// Constructs from an array of integers + CUTLASS_HOST_DEVICE + Coord(int _idx[]) { + for (int i = 0; i < N; ++i) { + idx[i] = _idx[i]; + } + } + + /// Element-wise addition + CUTLASS_HOST_DEVICE + Coord operator+(Coord const& b) const { + Coord c; + for (int i = 0; i < N; ++i) { + c.idx[i] = idx[i] + b.idx[i]; + } + return c; + } + + /// Element-wise subtraction + CUTLASS_HOST_DEVICE + Coord operator-(Coord const& b) const { + Coord c; + for (int i = 0; i < N; ++i) { + c.idx[i] = idx[i] - b.idx[i]; + } + return c; + } + + /// Element-wise multiplication + CUTLASS_HOST_DEVICE + Coord operator*(Coord const& b) const { + Coord c; + for (int i = 0; i < N; ++i) { + c.idx[i] = idx[i] * b.idx[i]; + } + return c; + } + + /// Element-wise division + CUTLASS_HOST_DEVICE + Coord operator/(Coord const& b) const { + Coord c; + for (int i = 0; i < N; ++i) { + c.idx[i] = idx[i] / b.idx[i]; + } + return c; + } + + /// In-place addition + CUTLASS_HOST_DEVICE + Coord& operator+=(Coord const& b) { + for (int i = 0; i < N; ++i) { + idx[i] += b.idx[i]; + } + return *this; + } + + /// In-place subtraction + CUTLASS_HOST_DEVICE + Coord& operator-=(Coord const& b) { + for (int i = 0; i < N; ++i) { + idx[i] -= b.idx[i]; + } + return *this; + } + + /// In-place multiplication + CUTLASS_HOST_DEVICE + Coord& operator*=(Coord const& b) { + for (int i = 0; i < N; ++i) { + idx[i] *= b.idx[i]; + } + return *this; + } + + /// In-place division + CUTLASS_HOST_DEVICE + Coord& operator/=(Coord const& b) { + for (int i = 0; i < N; ++i) { + idx[i] /= b.idx[i]; + } + return *this; + } + + /// Member access operator + CUTLASS_HOST_DEVICE int& operator[](int dim) { return idx[dim]; } + + /// Member access operator + CUTLASS_HOST_DEVICE int const& operator[](int dim) const { return idx[dim]; } + + /// Computes the dot product of two Coord instances + template + CUTLASS_HOST_DEVICE T dot(Coord const& b, T sum) const { + for (int i = 0; i < N; ++i) { + sum += idx[i] * b.idx[i]; + } + return sum; + } + + /// Computes the dot product of two Coord instances + template + CUTLASS_HOST_DEVICE T dot(Coord const& b) const { + T sum = T(0); + for (int i = 0; i < N; ++i) { + sum += idx[i] * b.idx[i]; + } + return sum; + } + + /// Gets the index of a given Coord element + template + CUTLASS_HOST_DEVICE int& at() { + return idx[Dim]; + } + + /// Access via index; may limit unrolling potential + CUTLASS_HOST_DEVICE + int& at(int dim) { return idx[dim]; } + + /// Gets the index of a given Coord element + template + CUTLASS_HOST_DEVICE int const& at() const { + return idx[Dim]; + } + + /// Access via index; may limit unrolling potential + CUTLASS_HOST_DEVICE + int const& at(int dim) const { return idx[dim]; } + + /// Determines if two Coord<> objects are equal + CUTLASS_HOST_DEVICE + bool operator==(Coord const& b) const { + bool equal = true; + for (int i = 0; equal && i < N; ++i) { + equal = (idx[i] == b.idx[i]); + } + return equal; + } + + /// Not equal + CUTLASS_HOST_DEVICE + bool operator!=(Coord const& b) const { return !(*this == b); } + + /// Clamps a coordinate to a range specified by maximum and minimum values + CUTLASS_HOST_DEVICE + Coord& clamp(Coord const& max, Coord const& min = Coord()) { + for (int i = 0; i < N; ++i) { + idx[i] = __NV_STD_MAX(__NV_STD_MIN(idx[i], max.idx[i]), min.idx[i]); + } + return *this; + } + + /// Returns the product of all elements + CUTLASS_HOST_DEVICE + int count() const { + int product = idx[0]; + for (int i = 1; i < N; ++i) { + product *= idx[i]; + } + return product; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to make a 2-element coordinate +CUTLASS_HOST_DEVICE +Coord<1> make_Coord(int _0) { + int values[1] = {_0}; + return Coord<1>(values); +} + +/// Helper to make a 2-element coordinate +CUTLASS_HOST_DEVICE +Coord<2> make_Coord(int _0, int _1) { + int values[2] = {_0, _1}; + return Coord<2>(values); +} + +/// Helper to make a 3-element coordinate +CUTLASS_HOST_DEVICE +Coord<3> make_Coord(int _0, int _1, int _2) { + int values[3] = {_0, _1, _2}; + return Coord<3>(values); +} + +/// Helper to make a 4-element coordinate +CUTLASS_HOST_DEVICE +Coord<4> make_Coord(int _0, int _1, int _2, int _3) { + int values[4] = {_0, _1, _2, _3}; + return Coord<4>(values); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Getter +CUTLASS_HOST_DEVICE +Coord<2> get_Coord_hw(Coord<3> const& coord) { return make_Coord(coord[1], coord[2]); } + +/// Getter +CUTLASS_HOST_DEVICE +Coord<2> get_Coord_hw(Coord<4> const& coord) { return make_Coord(coord[1], coord[2]); } + +/// Getter +CUTLASS_HOST_DEVICE +Coord<3> get_Coord_hwc(Coord<4> const& coord) { return make_Coord(coord[1], coord[2], coord[3]); } + +/// Getter +CUTLASS_HOST_DEVICE +Coord<3> get_Coord_dhw(Coord<4> const& coord) { return make_Coord(coord[0], coord[1], coord[2]); } + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/core_io.h b/cutlass/core_io.h new file mode 100644 index 00000000..cceea4c0 --- /dev/null +++ b/cutlass/core_io.h @@ -0,0 +1,44 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#pragma once + +/*! \file + \brief Helpers for printing cutlass/core objects +*/ + +#pragma once + +#include +#include + +#include + +template +std::ostream& operator<<(std::ostream& out, cutlass::Coord const& coord) { + for (int i = 0; i < Rank; ++i) { + out << (i ? ", " : "") << coord.idx[i]; + } + return out; +} diff --git a/cutlass/cutlass.h b/cutlass/cutlass.h new file mode 100644 index 00000000..1e428b16 --- /dev/null +++ b/cutlass/cutlass.h @@ -0,0 +1,73 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +/*! \file + \brief Basic include for CUTLASS macros +*/ + +#pragma once + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#define CUTLASS_MAJOR 1 +#define CUTLASS_MINOR 0 +#define CUTLASS_PATCH 0 +#define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH) + +#ifdef __NVCC__ +#define CUTLASS_HOST_DEVICE __forceinline__ __device__ __host__ +#define CUTLASS_DEVICE __forceinline__ __device__ +#elif defined(__CUDACC_RTC__) +#define CUTLASS_HOST_DEVICE __forceinline__ __device__ +#define CUTLASS_DEVICE __forceinline__ __device__ +#else +#define CUTLASS_HOST_DEVICE +// CUTLASS_DEVICE is an error if not compiling device code +#endif + +// CUTLASS_PRAGMA_UNROLL inserts a CUTLASS_PRAGMA_UNROLL if supported by the compiler +#if defined(__CUDA_ARCH__) +#if defined(_MSC_VER) +#define CUTLASS_PRAGMA_UNROLL __pragma("unroll") +#define CUTLASS_PRAGMA_NO_UNROLL __pragma("unroll 1") +#else +#define CUTLASS_PRAGMA_UNROLL _Pragma("unroll") +#define CUTLASS_PRAGMA_NO_UNROLL _Pragma("unroll 1") +#endif +#else +#define CUTLASS_PRAGMA_UNROLL +#define CUTLASS_PRAGMA_NO_UNROLL +#endif + +#define CUTLASS_ASSERT(x) assert(x) + +namespace cutlass { + +/// NVIDIA GPU Warp size +static const int kWarpSize = 32; + +} // namespace cutlass + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/cutlass/fragment.h b/cutlass/fragment.h new file mode 100644 index 00000000..53fa380c --- /dev/null +++ b/cutlass/fragment.h @@ -0,0 +1,278 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines Fragment, a statically-sized array for storing parts of matrices within a + thread's registers. +*/ +#pragma once + +#include +#include +#include +#include + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup fragment_concept Fragment Concept +@{ + +\ref fragment_concept is a statically sized array for storing parts of tiles held by individual CUDA +threads. + +@par \ref fragment_concept + Types satisfying \ref fragment_concept define the following members + - Element - type of each access held within the fragment + - kElements - number of elements stored by the fragment + - clear() - overwrites the fragment storage with zeros + - Element & operator[](int i) - by-reference access of the ith element + - Element const & operator[](int i) const - const by-reference access of the ith element +@} +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup fragment_iterator_concept Fragment Iterator Concept +@{ + +\ref fragment_iterator_concept provides structured access to the elements within a fragment with an +optional bitcast to the desired access type + +@par \ref fragment_iterator_concept + Types satisfying \ref fragment_iterator_concept define the following members + - AccessType& operator[](int i) - provides access to the ith element of the fragment + - AccessType& at(int d, int h, int w, int c) - applies \ref layout_concept to fragment and +provides access to element at (d, h, w, c) + +@} +*/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct StorageType { + typedef uint64_t Type; +}; +template <> +struct StorageType<4> { + typedef uint32_t Type; +}; +template <> +struct StorageType<2> { + typedef uint16_t Type; +}; +template <> +struct StorageType<1> { + typedef uint8_t Type; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief A template defining \ref fragment_concept +* @concept{fragment_concept} +*/ +template +struct Fragment : public AlignedStruct { + /// Make sure the alignment makes sense wrt the size of elements. + static_assert(kAlignment_ == 16 || kAlignment_ >= sizeof(Element_), "Alignment is too small"); + /// Alignment must be a power of two + static_assert(is_pow2::value, "Alignment must be a power of two"); + + /// This class. + typedef Fragment This_; + /// The element. + typedef Element_ Element; + /// The number of elements. + static int const kElements = kElements_; + + /// Clear a fragment. + CUTLASS_DEVICE void clear() { + // Avoid element-wise access for sub 32b element type + if (kAlignment_ >= 8 && (kElements * sizeof(Element)) % 8 == 0) { + uint64_t* ptr = reinterpret_cast(storage); + for (int i = 0; i < (kElements * sizeof(Element)) / 8; ++i) { + ptr[i] = uint64_t(0); + } + } else if (kAlignment_ >= 4 && (kElements * sizeof(Element)) % 4 == 0) { + uint32_t* ptr = reinterpret_cast(storage); + for (int i = 0; i < (kElements * sizeof(Element)) / 4; ++i) { + ptr[i] = uint32_t(0); + } + } else if (kAlignment_ >= 2 && (kElements * sizeof(Element)) % 2 == 0) { + uint16_t* ptr = reinterpret_cast(storage); + for (int i = 0; i < (kElements * sizeof(Element)) / 2; ++i) { + ptr[i] = uint16_t(0); + } + } else { + for (int i = 0; i < kElements; ++i) { + storage[i] = 0; + } + } + } + + /// The accessor. + CUTLASS_DEVICE Element& operator[](int i) { + assert(i < kElements_); + return reinterpret_cast(storage)[i]; + } + + /// The accessor. + CUTLASS_DEVICE Element const& operator[](int i) const { + assert(i < kElements_); + return reinterpret_cast(storage)[i]; + } + + private: + /// Storage type to use for Elements + typedef typename StorageType::Type StorageType; + + /// Number of elements in the storage + static int const kStorageCount = + (sizeof(Element_) * kElements_ + sizeof(StorageType) - 1) / sizeof(StorageType); + /// The storage. + StorageType storage[kStorageCount]; + + /// Ensure that there's enough storage for all elements + static_assert(sizeof(StorageType) <= kAlignment_, "StorageType is too big for given alignment"); +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief A template defining \ref fragment_iterator_concept +* @concept{fragment_iterator_concept} +*/ +template +struct FragmentIterator { + /// This class. + typedef FragmentIterator This_; + /// The fragment. + typedef Fragment_ Fragment; + /// The number of iterations. + typedef Iterations_ Iterations; + /// The access type. + typedef AccessType_ AccessType; + + /// The element. + typedef typename Fragment::Element Element; + /// The number of elements per access. + static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element)); + /// The shape of the the fragment. + typedef typename ShapeMul >::Shape FragmentShape; + /// The linear strides for iterations. + typedef typename ShapeStrides::Shape Strides; + + /// Ctor. + template + CUTLASS_DEVICE FragmentIterator(OtherFragment_& fragment, int offset = 0) + : pointer(reinterpret_cast(&fragment[offset])) { + static_assert(OtherFragment_::kElements >= Fragment::kElements, ""); + } + + /// The accessor. + CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const { + int const imm = ComputeOffsetFromStrides::get(d, h, w, c); + return reinterpret_cast(pointer[imm]); + } + + /// The accessor. + CUTLASS_DEVICE AccessType& at(int d, int h, int w, int c = 0) { + int const imm = ComputeOffsetFromStrides::get(d, h, w, c); + return reinterpret_cast(pointer[imm]); + } + + /// The accessor. + CUTLASS_DEVICE AccessType const& operator[](int i) const { + return reinterpret_cast(pointer[i * kElementsPerAccess]); + } + + /// The accessor. + CUTLASS_DEVICE AccessType& operator[](int i) { + return reinterpret_cast(pointer[i * kElementsPerAccess]); + } + + /// Is the iterator valid? + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; } + + /// The pointer. + Element* pointer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct FragmentConstIterator { + /// This class. + typedef FragmentIterator This_; + /// The fragment. + typedef Fragment_ Fragment; + /// The number of iterations. + typedef Iterations_ Iterations; + /// The access type. + typedef AccessType_ AccessType; + + /// The element. + typedef typename Fragment::Element Element; + /// The number of elements per access. + static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element)); + /// The shape of the the fragment. + typedef typename ShapeMul >::Shape FragmentShape; + /// The linear strides for iterations. + typedef typename ShapeStrides::Shape IterationsStrides; + + /// Ctor. + template + CUTLASS_DEVICE FragmentConstIterator(OtherFragment_& fragment, int offset = 0) + : pointer(reinterpret_cast(&fragment[offset])) { + static_assert(OtherFragment_::kElements >= Fragment::kElements, ""); + } + /// Create from non-constant FragmentIterator + CUTLASS_DEVICE FragmentConstIterator( + FragmentIterator const& rhs_) + : pointer(reinterpret_cast(rhs_.offset)) {} + + /// The accessor. + CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const { + int const imm = ComputeOffsetFromStrides::get(d, h, w, c); + return reinterpret_cast(pointer[imm]); + } + + /// The accessor. + CUTLASS_DEVICE AccessType const& operator[](int i) const { + return reinterpret_cast(pointer[i * kElementsPerAccess]); + } + + /// Is the iterator valid? + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; } + + /// The pointer. + Element const* pointer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/fragment_load_store.h b/cutlass/fragment_load_store.h new file mode 100644 index 00000000..a7d272e9 --- /dev/null +++ b/cutlass/fragment_load_store.h @@ -0,0 +1,135 @@ +/*************************************************************************************************** + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines accessors for loading and storing fragments to memory efficiently. +*/ +#pragma once + +#include +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct FragmentLoad {}; + +template +struct FragmentLoad { + /// The output type. + typedef FragmentElement_ AccessType; + + /// The load function. + static CUTLASS_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) { + value.load(&pointer[offset], kStride); + } +}; + +template +struct FragmentLoad { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The load function. + static CUTLASS_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) { + Load::load(value, pointer, offset); + } +}; + +template +struct FragmentStore {}; + +template +struct FragmentStore { + /// The input type. + typedef FragmentElement_ AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) { + value.store(&pointer[offset], kStride); + } +}; + +template +struct FragmentStore { + /// The input type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) { + Store::store(value, pointer, offset); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} /// namespace cutlass diff --git a/cutlass/fragment_multiply_add.h b/cutlass/fragment_multiply_add.h new file mode 100644 index 00000000..2d31e793 --- /dev/null +++ b/cutlass/fragment_multiply_add.h @@ -0,0 +1,131 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines multiply-add operations on fragments within a thread. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct FragmentMultiplyAdd { + /// The shape of the instruction. + typedef Shape<1, 1, 1, 1> InstructionShape; + /// The type for A. + typedef Scalar_ ScalarA; + /// The type for B. + typedef Scalar_ ScalarB; + /// The type for C and D. + typedef Scalar_ ScalarC; + + /// Ctor. + CUTLASS_DEVICE FragmentMultiplyAdd() {} + + /// Multiply : d = a*b. + template + CUTLASS_DEVICE void multiply(Scalar_ a, Fragment_ const& b, Fragment_& d) { + for (int j = 0; j < Fragment_::kElements; ++j) { + d[j] = a * b[j]; + } + } + + /// Multiply : d = a*b + c. + template + CUTLASS_DEVICE void multiply_add(Scalar_ a, + Fragment_ const& b, + Fragment_ const& c, + Fragment_& d) { + for (int j = 0; j < Fragment_::kElements; ++j) { + d[j] = a * b[j] + c[j]; + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) +template <> +struct FragmentMultiplyAdd { + /// The shape of the instruction. + typedef Shape<1, 1, 1, 1> InstructionShape; + /// The type for A. + typedef half ScalarA; + /// The type for B. + typedef half ScalarB; + /// The type for C and D. + typedef half ScalarC; + + /// Ctor. + CUTLASS_DEVICE FragmentMultiplyAdd() {} + + /// Multiply : d = a*b. + template + CUTLASS_DEVICE void multiply(half a, Fragment_ const& b, Fragment_& d) { +#if defined(__CUDACC__) && __CUDA_ARCH__ >= 530 + // The input. + __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]); + // The output. + __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]); + + // Assemble a half2 from a. + __half2 const a_half2 = __half2half2(a); + + for (int i = 0; i < Fragment_::kElements / 2; ++i) { + d_half2[i] = __hmul2(a_half2, b_half2[i]); + } +#endif + } + + /// Multiply : d = a*b + c. + template + CUTLASS_DEVICE void multiply_add(half a, Fragment_ const& b, Fragment_ const& c, Fragment_& d) { +#if defined(__CUDACC__) && __CUDA_ARCH__ >= 530 + // The inputs. + __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]); + __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]); + // The output. + __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]); + + // Assemble a half2 from a. + __half2 const a_half2 = __half2half2(a); + + for (int i = 0; i < Fragment_::kElements / 2; ++i) { + d_half2[i] = __hfma2(a_half2, b_half2[i], c_half2[i]); + } +#endif + } +}; + +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/block_loader.h b/cutlass/gemm/block_loader.h deleted file mode 100644 index 0c5b3faa..00000000 --- a/cutlass/gemm/block_loader.h +++ /dev/null @@ -1,162 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * block-wide tile-loading abstractions - */ - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * load_algorithm - ******************************************************************************/ - -/** - * \brief Enumeration of matrix loading algorithms - */ -struct load_algorithm -{ - /// \brief Enumerants. See corresponding tag types. - enum kind_t - { - CongruousCopy = 0, - CrosswiseCopy = 1, - }; - - /** - * \brief Generic tag - */ - template - struct any_tag : nv_std::integral_constant {}; - - /** - * \brief Copy from a global matrix that is row-major in relation - * to the local row-major tile - */ - typedef any_tag contiguous_tag_t; - - /** - * \brief Copy from a global matrix that is column-major in relation - * to the local row-major tile - */ - typedef any_tag crosswise_tag_t; - -}; - - -/****************************************************************************** - * block_loader - ******************************************************************************/ - -/** - * \brief A three-phase data loading abstraction (prefetch, commit, and - * advance) for iterating over ranges of block-wide matrix tiles. - * - * Each iteration sequence produces a KxL (height-by-width) block-wide tile of - * value_t in shared memory. The layout of the shared - * block-wide tile is a row-major (L-major) tiling of dp_vector_t items, which are - * themselves column-major (K-major) vectors of value_t. Its dimensions are: - * K = BlockDpVectorsK * (sizeof(dp_vector_t) / sizeof(value_t) - * L = BlockDpVectorsL - * - * NB: This generic class is not directly constructible. Architecture- and - * algorithm-specific template specializations will provide the API - * functionality prescribed here. - * - */ -template < - int BlockThreads, ///< Number of threads in each thread block (blockDim.x) - int BlockDpVectorsK, ///< Extent of block-wide tile in dp_vector_t along the K-axis (height) - int BlockDpVectorsL, ///< Extent of block-wide tile in dp_vector_t along the L-axis (width) - typename value_t, ///< Input matrix value type - int LeadingDimAlignBytes, ///< Byte alignment of input matrix leading dimension - bool AllowRaggedTiles, ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions - typename dp_vector_t, ///< Dot-product vector type along the K-axis - load_algorithm::kind_t LoadAlgorithm> ///< Algorithm for loading a shared tile of KxL matrix data -struct block_loader -{ - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - block_loader( - value_t *d_matrix, ///< Pointer to input matrix - int matrix_values_l, ///< Extent of the input matrix in value_t along the L-axis - int matrix_values_stride_k, ///< Distance in value_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_values_stride_l, ///< Distance in value_t within pitched-linear memory between successive coordinates along the L-axis - int2 block_begin_item_coords, ///< Thread block's starting value_t coordinates (l, k) within the input matrix - int block_end_item_k); ///< Thread block's ending coordinate (k) within the input matrix (one-past) - - //------------------------------------------------------------------------- - // Loader API - //------------------------------------------------------------------------- - - /** - * Request the current block-wide tile - */ - void request(); - - - /** - * Advance the loader to the next block-wide tile in the K-axis - */ - void next(); - - - /** - * Commit the previously-requested block-wide tile to shared memory - * - * NB: To facilitate padding for avoiding shared memory bank conflicts, we - * allow the row stride _BlockDpVectorsL to be arbitrarily bigger than the - * tile width BlockDpVectorsL. - */ - template - void commit( - dp_vector_t (&scratch_tile)[BlockDpVectorsK][_BlockDpVectorsL]); - -}; - - -} // namespace gemm -} // namespace cutlass - - -/****************************************************************************** - * Tail-include specializations that adhere to the block_loader API - ******************************************************************************/ - -#include "block_loader_crosswise.h" -#include "block_loader_congruous_dp1.h" -#include "block_loader_congruous_idp4.h" diff --git a/cutlass/gemm/block_loader_congruous_dp1.h b/cutlass/gemm/block_loader_congruous_dp1.h deleted file mode 100644 index 80dca26d..00000000 --- a/cutlass/gemm/block_loader_congruous_dp1.h +++ /dev/null @@ -1,406 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Tile-loading abstraction for thread blocks - */ - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * block_loader (CongruousCopy + dp1 specialization) - ******************************************************************************/ - -/** - * \brief A three-phase data loading abstraction (prefetch, commit, and - * advance) for iterating over ranges of block-wide matrix tiles. - * (CongruousCopy + dp1 specialization) - * - * Each iteration sequence produces a KxL (height-by-width) block-wide tile of - * value_t in shared memory. The layout of the shared block-wide tile is - * a row-major (L-major) tiling of singleton "dp1" dp_vector_t items, where - * dp_vector_t == value_t. Its dimensions are: - * K = BlockDpVectorsK - * L = BlockDpVectorsL - * - * The data is copied from a corresponding tile of global matrix data whose - * layout of value_t is also L-major. This constitutes a CongruousCopy - * between the L-major global tile and the L-major shared tile. - * - * NB: Because they are "dp1" singletons, the K-major orientation of - * dp_vector_t in shared memory is irrelevant, and the L-major global and - * shared tile layouts are perfectly congruous. As a result, we can increase - * the granularity of data transfer via vectorization of loads and stores - * without any intermediate {dis|re}assembly. - * - * NB: Consecutive threads within a block are mapped in L-major - * fashion across a first-set of LDG-vectors of dp_vector_t (value_t) within - * their global tile. Successive sets of LDG-vectors are then strip-mined - * as necessary down the K-axis. These discontiguous LDG-vectors comprise the - * thread's "slice" of the block-wide tile. - */ -template < - int BlockThreads, ///< Number of threads in each thread block (blockDim.x) - int BlockDpVectorsK, ///< Extent of block-wide tile in dp_vector_t along the K-axis (height) - int BlockDpVectorsL, ///< Extent of block-wide tile in dp_vector_t along the L-axis (width) - typename value_t, ///< Input matrix value type - int LeadingDimAlignBytes, ///< Byte alignment of input matrix leading dimension - bool AllowRaggedTiles ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions -> -struct block_loader< - BlockThreads, - BlockDpVectorsK, - BlockDpVectorsL, - value_t, - LeadingDimAlignBytes, - AllowRaggedTiles, - value_t, ///< Dot-product vector type along the K-axis (dp1 specialization) - load_algorithm::CongruousCopy> ///< Algorithm for loading a shared tile of KxL matrix data (CongruousCopy specialization) -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - /// Dot-product vector type along the K-axis - typedef value_t dp_vector_t; - - enum - { - /// Number of value_t in a dp_vector_t - DpVectorItems = divide_assert::value, - - /// Number of dp_vector_t in a block-wide tile - BlockDpVectors = BlockDpVectorsK * BlockDpVectorsL, - - /// Number of dp_vector_t in a thread-tile - ThreadDpVectors = divide_assert::value, - }; - - /// Data movement type, coarsened by LeadingDimAlignBytes, capped by the - /// smaller of either ThreadDpVectors or BlockDpVectorsL - typedef io_vector< - dp_vector_t, - __NV_STD_MIN(ThreadDpVectors, BlockDpVectorsL), - LeadingDimAlignBytes> - ldg_vector_t; - - enum - { - /// Number of dp_vector_t per ldg_vector_t - LdgVectorDpVectors = ldg_vector_t::VectorItems, - - /// Number of value_t per ldg_vector_t - LdgVectorItems = LdgVectorDpVectors * DpVectorItems, - - - - /// Total number of ldg_vector_t within each block-wide tile - BlockLdgVectors = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along L-axis - BlockLdgVectorsL = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along K-axis - BlockLdgVectorsK = BlockDpVectorsK, - - - - /// Number of ldg_vector_t within each thread-tile - ThreadLdgVectors = divide_assert::value, - - /// Extent of the thread tile in ldg_vector_t along L-axis - ThreadLdgVectorsL = __NV_STD_MAX(1, (BlockLdgVectorsL / BlockThreads)), - - /// Extent of the thread tile in ldg_vector_t along K-axis - ThreadLdgVectorsK = divide_assert::value, - - - - /// Number of ldg_vector_t within each stripmine-tile - StripmineLdgVectors = BlockThreads, - - /// Extent of the stripmine tile in ldg_vector_t along L-axis - StripmineLdgVectorsL = __NV_STD_MIN(BlockLdgVectorsL, StripmineLdgVectors), - - /// Extent of the stripmine tile in ldg_vector_t along K-axis - StripmineLdgVectorsK = divide_assert::value, - - - - /// Alignment in dp_vector_t along L needed for committing prefetch - AlignmentDpVectorsL = LdgVectorDpVectors, - }; - - /// Predicate bit vector - typedef uint64_t predicate_mask_t; - - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - static_assert( - (ThreadLdgVectors <= sizeof(predicate_mask_t) * 8), - "Predicate mask type does not contain enough bits for encoding load predicates"); - - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Input pointer to matrix in ldg_vector_t - ldg_vector_t *d_matrix_ldgvecs; - - /// Extent of the input matrix in ldg_vector_t along the L-axis - int matrix_ldgvecs_l; - - /// Thread block's ending ldg_vector_t coordinate (k) within the input matrix (one-past) - int block_end_ldgvec_k; - - /// Predicate bits for guarding ldg_vector_t loads within "whole-k" block-wide tiles - predicate_mask_t guard; - - /// Predicate bits for guarding ldg_vector_t loads within the final block-wide "residue" tile - predicate_mask_t residue_guard; - - /// Iteration span in "whole-k" block-wide tiles - int wholek_tiles_remaining; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_ldgvec_stride_k; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the L-axis - int matrix_ldgvec_stride_l; - - /// ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - int2 block_thread_ldgvec_coords; - - /// Thread-wide tile of prefetch data - ldg_vector_t thread_tile[ThreadLdgVectorsK][ThreadLdgVectorsL]; - - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_loader( - value_t *d_matrix_items, ///< Input pointer to matrix in value_t - int matrix_items_l, ///< Extent of the input matrix in value_t along the L-axis - int matrix_items_stride_k, ///< Distance in value_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_items_stride_l, ///< Distance in value_t within pitched-linear memory between successive coordinates along the L-axis - int2 matrix_block_item_coords, ///< value_t coordinates (l, k) of first block-wide tile within the input matrix - int block_end_item_k) ///< Thread block's ending coordinate (k) within the input matrix (one-past) - : - block_end_ldgvec_k(block_end_item_k), - guard(0), - residue_guard(0) - { - matrix_ldgvecs_l = matrix_items_l / LdgVectorItems; - matrix_ldgvec_stride_k = matrix_items_stride_k / LdgVectorItems, - matrix_ldgvec_stride_l = matrix_items_stride_l; - - // ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - block_thread_ldgvec_coords = make_int2( - threadIdx.x % BlockLdgVectorsL, // l-coordinate - threadIdx.x / BlockLdgVectorsL); // k-coordinate - - // ldg_vector_t coordinates (l, k) of first block-wide tile within the input matrix - int2 matrix_block_ldgvec_coords = make_int2( - matrix_block_item_coords.x / LdgVectorItems, // l-coordinate - matrix_block_item_coords.y); // k-coordinate - - // Iteration span in ldg_vector_t - int span_ldgvec_k = (block_end_item_k - matrix_block_item_coords.y); - - - - // ldg_vector_t coordinates (l, k) of first thread-tile tile within the input matrix - int2 matrix_thread_ldgvec_coords = make_int2( - block_thread_ldgvec_coords.x + matrix_block_ldgvec_coords.x, - block_thread_ldgvec_coords.y + matrix_block_ldgvec_coords.y); - - // Iteration range in "whole-k" block-wide tiles - wholek_tiles_remaining = span_ldgvec_k / BlockLdgVectorsK; - - // Extent of final residue-tile in ldg_vector_t along K-axis - int residue_ldgvecs_k = span_ldgvec_k % BlockLdgVectorsK; - - // Initialize I/O predicates - if (AllowRaggedTiles) - { - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - int block_ldgvec_k = block_thread_ldgvec_coords.y + (thread_ldgvec_k * StripmineLdgVectorsK); - - // Whether block_ldgvec_coords.y is valid in the final residue tile - predicate_mask_t valid_k = (block_ldgvec_k < residue_ldgvecs_k); - - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - // Whether block_ldgvec_coords.x is valid any block-wide tile - predicate_mask_t valid_l = (matrix_block_ldgvec_coords.x + block_ldgvec_l < matrix_ldgvecs_l); - - // Linear index of ldg_vector_t load - int ldgvec_idx = thread_ldgvec_l + (thread_ldgvec_k * ThreadLdgVectorsL); - - // Set predicate guard bits - guard |= (valid_l << ldgvec_idx); - residue_guard |= ((valid_l & valid_k) << ldgvec_idx); - } - } - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - - // Update the input pointer to be matrix_thread_ldgvec_coords - this->d_matrix_ldgvecs = - reinterpret_cast(d_matrix_items) + - (matrix_thread_ldgvec_coords.y * matrix_ldgvec_stride_k) + - (matrix_thread_ldgvec_coords.x * matrix_ldgvec_stride_l); - } - - - //------------------------------------------------------------------------- - // Loader API - //------------------------------------------------------------------------- - - /** - * Request the current block-wide tile - */ - inline __device__ - void request() - { - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - // Linear index of ldg_vector_t load - int ldgvec_idx = (thread_ldgvec_k * ThreadLdgVectorsL) + thread_ldgvec_l; - - // Unpack predicate guard - predicate_mask_t valid = ((guard >> ldgvec_idx) & 1); - - if (!AllowRaggedTiles || valid) - { - // Perform load - thread_tile[thread_ldgvec_k][thread_ldgvec_l].load( - d_matrix_ldgvecs + - (thread_ldgvec_k * StripmineLdgVectorsK * matrix_ldgvec_stride_k) + - (thread_ldgvec_l * StripmineLdgVectorsL * matrix_ldgvec_stride_l)); - } - else - { - // Zero-initialize - #pragma unroll - for (int dpvec = 0; dpvec < LdgVectorDpVectors; ++dpvec) - thread_tile[thread_ldgvec_k][thread_ldgvec_l].buff[dpvec] = 0; - } - } - } - } - - - /** - * Advance the loader to the next block-wide tile in the K-axis - */ - inline __device__ - void next() - { - d_matrix_ldgvecs += (matrix_ldgvec_stride_k * BlockLdgVectorsK); - - if (AllowRaggedTiles) - { - --wholek_tiles_remaining; - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - } - - - /** - * Commit the previously-requested block-wide tile to shared memory - * - * NB: To facilitate padding for avoiding shared memory bank conflicts, we - * allow the row stride SmemDpVectorsL to be arbitrarily bigger than the - * tile width BlockDpVectorsL. - */ - template - inline __device__ - void commit( - dp_vector_t (&scratch_tile)[BlockDpVectorsK][SmemDpVectorsL]) - { - static_assert(SmemDpVectorsL >= BlockDpVectorsL, "Row stride must be >= tile width."); - - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - int block_ldgvec_k = block_thread_ldgvec_coords.y + (thread_ldgvec_k * StripmineLdgVectorsK); - - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - thread_tile[thread_ldgvec_k][thread_ldgvec_l].store( - &scratch_tile[block_ldgvec_k][block_ldgvec_l * LdgVectorDpVectors]); - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/block_loader_congruous_idp4.h b/cutlass/gemm/block_loader_congruous_idp4.h deleted file mode 100644 index 686da1db..00000000 --- a/cutlass/gemm/block_loader_congruous_idp4.h +++ /dev/null @@ -1,544 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Tile-loading abstraction for thread blocks - */ - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * block_loader (CongruousCopy + idp4 specialization) - ******************************************************************************/ - -/** - * \brief A three-phase data loading abstraction (prefetch, commit, and - * advance) for iterating over ranges of block-wide matrix tiles. - * (CongruousCopy + idp4 specialization) - * - * Each iteration sequence produces a KxL (height-by-width) block-wide tile of - * value_t in shared memory. The layout of the shared block-wide tile is - * a row-major (L-major) tiling of int32_t dp_vector_t, which are themselves - * column-major (K-major) vectors of int8_t value_t. Its dimensions are: - * K = BlockDpVectorsK * (sizeof(dp_vector_t) / sizeof(value_t) - * L = BlockDpVectorsL - * - * The data is copied from a corresponding tile of global matrix data whose - * layout of value_t is also L-major. This constitutes a CongruousCopy between - * the L-major global tile and the L-major shared tile. - * - * NB: The K-major value_t in shared dp_vector_t are imperfectly congruous - * with the L-major value_t in global memory. As a result, the granularity - * of data transfer is a "dp-square" of (DpVectorItems * DpVectorItems) values - * that must be transposed from L-oriented dp_vector_t to K-oriented - * dp_vector_t prior to commitment. - * - * NB: Consecutive threads within a block are mapped in L-major - * fashion across a first-set of squares within their global tile. Successive - * sets of squares are then strip-mined as necessary down the K-axis. These - * discontiguous squares comprise the thread's "slice" of the block-wide tile. - */ -template < - int BlockThreads, ///< Number of threads in each thread block (blockDim.x) - int _BlockDpVectorsK, ///< Extent of block-wide tile in dp_vector_t along the K-axis (height) - int _BlockDpVectorsL, ///< Extent of block-wide tile in dp_vector_t along the L-axis (width) - int LeadingDimAlignBytes, ///< Byte alignment of input matrix leading dimension - bool AllowRaggedTiles ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions -> -struct block_loader< - BlockThreads, - _BlockDpVectorsK, - _BlockDpVectorsL, - int8_t, ///< Input matrix value type (idp4 specialization) - LeadingDimAlignBytes, - AllowRaggedTiles, - int32_t, ///< Dot-product vector type along the K-axis (idp4 specialization) - load_algorithm::CongruousCopy> ///< Algorithm for loading a shared tile of KxL matrix data (CrosswiseCopy specialization) -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - /// Input matrix value type - typedef int8_t value_t; - - /// Dot-product vector type along the K-axis - typedef int32_t dp_vector_t; - - enum - { - /// Number of value_t in a dp_vector_t - DpVectorItems = divide_assert::value, - - /// Number of dp_vector_t in a block-wide tile - BlockDpVectors = _BlockDpVectorsK * _BlockDpVectorsL, - - /// Number of dp_vector_t in a thread-tile - ThreadDpVectors = divide_assert::value, - - /// Number of dp_vector_t in a dp-square - SquareDpVectors = DpVectorItems, - - /// Number of dp-square tiles in a thread-tile - ThreadSquares = divide_assert::value, - - /// Extent of block-wide tile in transposed dp_vector_t along the K-axis (height) - BlockTransDpVectorsK = _BlockDpVectorsK * DpVectorItems, - - /// Extent of block-wide tile in transposed dp_vector_t along the L-axis (height) - BlockTransDpVectorsL = divide_assert<_BlockDpVectorsL, DpVectorItems>::value, - - - }; - - /// Load-from-global data movement type, coarsened by LeadingDimAlignBytes, capped by the - /// smaller of either ThreadSquares or BlockTransDpVectorsL - typedef io_vector< - dp_vector_t, - __NV_STD_MIN(ThreadSquares, BlockTransDpVectorsL), - LeadingDimAlignBytes> - ldg_vector_t; - - /// Store-to-shared data movement type equivalent to a dp-square - typedef io_vector< - dp_vector_t, - SquareDpVectors> - sts_vector_t; - - enum - { - /// Number of dp_vector_t per ldg_vector_t - LdgVectorDpVectors = ldg_vector_t::VectorItems, - - /// Number of value_t per ldg_vector_t - LdgVectorItems = LdgVectorDpVectors * DpVectorItems, - - - - /// Total number of ldg_vector_t within each block-wide tile - BlockLdgVectors = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along L-axis - BlockLdgVectorsL = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along K-axis - BlockLdgVectorsK = BlockTransDpVectorsK, - - - - /// Number of ldg_vector_t within each thread-tile - ThreadLdgVectors = divide_assert::value, - - /// Extent of the thread tile in ldg_vector_t along L-axis - ThreadLdgVectorsL = __NV_STD_MAX(1, (BlockLdgVectorsL / BlockThreads)), - - /// Extent of the thread tile in ldg_vector_t along K-axis - ThreadLdgVectorsK = divide_assert::value, - - /// Extent of the thread tile in dp-square tiles along K-axis - ThreadSquaresK = divide_assert::value, - - - - /// Number of ldg_vector_t within each stripmine-tile - StripmineLdgVectors = BlockThreads * SquareDpVectors, - - /// Extent of the stripmine tile in ldg_vector_t along L-axis - StripmineLdgVectorsL = __NV_STD_MIN(BlockLdgVectorsL, BlockThreads), - - /// Extent of the stripmine tile in ldg_vector_t along K-axis - StripmineLdgVectorsK = divide_assert::value, - - /// Extent of the stripmine tile in dp-square tiles along K-axis - StripmineSquaresK = divide_assert::value, - - - - /// Alignment in dp_vector_t along L needed for committing prefetch - AlignmentDpVectorsL = LdgVectorDpVectors, - }; - - /// Predicate mask type - typedef uint32_t predicate_mask_t; - - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - static_assert((LeadingDimAlignBytes >= 4) && (LeadingDimAlignBytes % 4 == 0), - "Alignment for matrix operands to IGEMM must be a multiple of 4 bytes."); - - static_assert( - (ThreadLdgVectors <= sizeof(predicate_mask_t) * 8), - "Predicate mask type does not contain enough bits for encoding load predicates"); - - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Input pointer to matrix in ldg_vector_t - ldg_vector_t *d_matrix_ldgvecs; - - /// Extent of the input matrix in ldg_vector_t along the L-axis - int matrix_ldgvecs_l; - - /// Thread block's ending ldg_vector_t coordinate (k) within the input matrix (one-past) - int block_end_ldgvec_k; - - /// Predicate bits for guarding ldg_vector_t loads within "whole-k" block-wide tiles - predicate_mask_t guard; - - /// Predicate bits for guarding ldg_vector_t loads within the final block-wide "residue" tile - predicate_mask_t residue_guard; - - /// Iteration span in "whole-k" block-wide tiles - int wholek_tiles_remaining; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_ldgvec_stride_k; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the L-axis - int matrix_ldgvec_stride_l; - - /// ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - int2 block_thread_ldgvec_coords; - - /// Thread-wide tile of prefetch data - ldg_vector_t thread_tile[ThreadSquaresK][SquareDpVectors][ThreadLdgVectorsL]; - - - - //------------------------------------------------------------------------- - // Utility methods - //------------------------------------------------------------------------- - - - /** - * \brief Byte-permute. Pick four arbitrary bytes from two 32-bit registers, and reassemble them into a 32-bit destination register. For SM2.0 or later. - * - * \par - * The bytes in the two source registers \p a and \p b are numbered from 0 to 7: - * {\p b, \p a} = {{b7, b6, b5, b4}, {b3, b2, b1, b0}}. For each of the four bytes - * {b3, b2, b1, b0} selected in the return value, a 4-bit selector is defined within - * the four lower "nibbles" of \p index: {\p index } = {n7, n6, n5, n4, n3, n2, n1, n0} - * - * \par Snippet - * The code snippet below illustrates byte-permute. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * int a = 0x03020100; - * int b = 0x07060504; - * int index = 0x00007531; - * - * int selected = prmt(a, b, index); // 0x07050301 - * - * \endcode - * - */ - inline __device__ - int32_t prmt(int32_t a, int32_t b, unsigned int index) - { - int ret; - asm volatile("prmt.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(a), "r"(b), "r"(index)); - return ret; - } - - - /** - * Convert a "dp-square" from L-major to K-major - */ - inline __device__ - void transpose_dp_square(dp_vector_t (&dp_square)[SquareDpVectors]) - { - // Transpose dp_vector_t squares - int32_t y = prmt(dp_square[0], dp_square[1], 0x00007362); - int32_t w = prmt(dp_square[2], dp_square[3], 0x00007362); - int32_t x = prmt(dp_square[0], dp_square[1], 0x00005140); - int32_t z = prmt(dp_square[2], dp_square[3], 0x00005140); - - dp_square[0] = prmt(x, z, 0x00005410); - dp_square[1] = prmt(x, z, 0x00007632); - dp_square[2] = prmt(y, w, 0x00005410); - dp_square[3] = prmt(y, w, 0x00007632); - } - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_loader( - value_t *d_matrix_items, ///< Input pointer to matrix in value_t - int matrix_items_l, ///< Extent of the input matrix in value_t along the L-axis - int matrix_items_stride_k, ///< Distance in value_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_items_stride_l, ///< Distance in value_t within pitched-linear memory between successive coordinates along the L-axis - int2 matrix_block_item_coords, ///< value_t coordinates (l, k) of first block-wide tile within the input matrix - int block_end_item_k) ///< Thread block's ending coordinate (k) within the input matrix (one-past) - : - block_end_ldgvec_k(block_end_item_k), - guard(0), - residue_guard(0) - { - matrix_ldgvecs_l = matrix_items_l / LdgVectorItems; - matrix_ldgvec_stride_k = matrix_items_stride_k / LdgVectorItems, - matrix_ldgvec_stride_l = matrix_items_stride_l; - - // ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - block_thread_ldgvec_coords = make_int2( - threadIdx.x % BlockLdgVectorsL, // l-coordinate - (threadIdx.x / BlockLdgVectorsL) * SquareDpVectors); // k-coordinate - - // ldg_vector_t coordinates (l, k) of first block-wide tile within the input matrix - int2 matrix_block_ldgvec_coords = make_int2( - matrix_block_item_coords.x / LdgVectorItems, // l-coordinate - matrix_block_item_coords.y); // k-coordinate - - // Iteration span in ldg_vector_t - int span_ldgvec_k = (block_end_item_k - matrix_block_item_coords.y); - - - - // ldg_vector_t coordinates (l, k) of first thread-tile tile within the input matrix - int2 matrix_thread_ldgvec_coords = make_int2( - block_thread_ldgvec_coords.x + matrix_block_ldgvec_coords.x, - block_thread_ldgvec_coords.y + matrix_block_ldgvec_coords.y); - - // Iteration range in "whole-k" block-wide tiles - wholek_tiles_remaining = span_ldgvec_k / BlockLdgVectorsK; - - // Extent of final residue-tile in ldg_vector_t along K-axis - int residue_ldgvecs_k = span_ldgvec_k % BlockLdgVectorsK; - - // Initialize I/O predicates - if (AllowRaggedTiles) - { - // Iterate through rows of squares in thread tile - #pragma unroll - for (int thread_square_k = 0; thread_square_k < ThreadSquaresK; ++thread_square_k) - { - // Iterate through rows of dp_vector_t in each square - #pragma unroll - for (int square_dpvec = 0; square_dpvec < SquareDpVectors; ++square_dpvec) - { - // ldg_vector_t K-coordinate in block-wide tile (K-axis strip-mining of ldg_vector_t within block-tile) - int block_ldgvec_k = - block_thread_ldgvec_coords.y + - (thread_square_k * StripmineLdgVectorsK) + - square_dpvec; - - // Whether block_ldgvec_coords.y is valid in the final residue tile - predicate_mask_t valid_k = (block_ldgvec_k < residue_ldgvecs_k); - - // L-axis strip-mining of block-tile - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - // ldg_vector_t L-coordinate in block-wide tile (L-axis strip-mining of ldg_vector_t within block-tile) - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - // Whether block_ldgvec_coords.x is valid any block-wide tile - predicate_mask_t valid_l = (matrix_block_ldgvec_coords.x + block_ldgvec_l < matrix_ldgvecs_l); - - // Linear index of ldg_vector_t load - int ldgvec_idx = - (thread_square_k * SquareDpVectors * ThreadLdgVectorsL) + - (square_dpvec * ThreadLdgVectorsL) + - thread_ldgvec_l; - - // Set predicate guard bits - guard |= (valid_l << ldgvec_idx); - residue_guard |= ((valid_l & valid_k) << ldgvec_idx); - } - } - } - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - - // Update the input pointer to be matrix_thread_ldgvec_coords - this->d_matrix_ldgvecs = - reinterpret_cast(d_matrix_items) + - (matrix_thread_ldgvec_coords.y * matrix_ldgvec_stride_k) + - (matrix_thread_ldgvec_coords.x * matrix_ldgvec_stride_l); - } - - - //------------------------------------------------------------------------- - // Loader API - //------------------------------------------------------------------------- - - /** - * Request the current block-wide tile - */ - inline __device__ - void request() - { - // Each thread iterates through the ldg_vector_t in its thread tile - - // Iterate through rows of squares in thread tile - #pragma unroll - for (int thread_square_k = 0; thread_square_k < ThreadSquaresK; ++thread_square_k) - { - // Iterate through rows of dp_vector_t in each square - #pragma unroll - for (int square_dpvec = 0; square_dpvec < SquareDpVectors; ++square_dpvec) - { - // Iterate through ldg_vector_t in each row - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - // Linear index of ldg_vector_t load - int ldgvec_idx = - (thread_square_k * SquareDpVectors * ThreadLdgVectorsL) + - (square_dpvec * ThreadLdgVectorsL) + - thread_ldgvec_l; - - // Unpack predicate guard - predicate_mask_t valid = ((guard >> ldgvec_idx) & 1); - - if (!AllowRaggedTiles || valid) - { - // Perform load - thread_tile[thread_square_k][square_dpvec][thread_ldgvec_l].load( - d_matrix_ldgvecs + - (((thread_square_k * StripmineLdgVectorsK) + square_dpvec) * matrix_ldgvec_stride_k) + - (thread_ldgvec_l * StripmineLdgVectorsL * matrix_ldgvec_stride_l)); - } - else - { - // Zero-initialize - #pragma unroll - for (int dpvec = 0; dpvec < LdgVectorDpVectors; ++dpvec) - thread_tile[thread_square_k][square_dpvec][thread_ldgvec_l].buff[dpvec] = 0; - } - } - } - } - } - - - /** - * Advance the loader to the next block-wide tile in the K-axis - */ - inline __device__ - void next() - { - d_matrix_ldgvecs += (matrix_ldgvec_stride_k * BlockLdgVectorsK); - - if (AllowRaggedTiles) - { - --wholek_tiles_remaining; - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - } - - - /** - * Commit the previously-requested block-wide tile to shared memory - * - * NB: To facilitate padding for avoiding shared memory bank conflicts, we - * allow the row stride SmemDpVectorsL to be arbitrarily bigger than the - * tile width BlockDpVectorsL. - */ - template - inline __device__ - void commit( - dp_vector_t (&scratch_tile)[_BlockDpVectorsK][SmemDpVectorsL]) - { - static_assert(SmemDpVectorsL >= _BlockDpVectorsL, "Row stride must be >= tile width."); - - // Square K-coordinate of thread tile in block-wide tile - int block_thread_square_k = block_thread_ldgvec_coords.y / SquareDpVectors; - - // Iterate through rows of squares in thread tile - #pragma unroll - for (int thread_square_k = 0; thread_square_k < ThreadSquaresK; ++thread_square_k) - { - // Square K-coordinate in block-wide tile (K-axis strip-mining of squares within block-tile) - int block_square_k = block_thread_square_k + (thread_square_k * StripmineSquaresK); - - // Iterate through ldg_vector_t in each row - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - // ldg_vector_t L-coordinate in block-wide tile (L-axis strip-mining of ldg_vector_t within block-tile) - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - // Iterate through squares in each ldg_vector_t - #pragma unroll - for (int ldgvec_dpvec_l = 0; ldgvec_dpvec_l < LdgVectorDpVectors; ++ldgvec_dpvec_l) - { - // Square L-coordinate in block-wide tile (L-axis raking of square-slices within ldg_vector_t) - int block_square_l = (block_ldgvec_l * LdgVectorDpVectors) + ldgvec_dpvec_l; - - // Assemble square of L-major dp_vector_t from stack of slices - sts_vector_t square; - - // Iterate through rows of dp_vector_t in each square - #pragma unroll - for (int square_dpvec = 0; square_dpvec < SquareDpVectors; ++square_dpvec) - { - square.buff[square_dpvec] = thread_tile[thread_square_k][square_dpvec][thread_ldgvec_l].buff[ldgvec_dpvec_l]; - } - - // Un-transpose square from L-major to K-major - transpose_dp_square(square.buff); - - // Store dp-square - square.store(&scratch_tile[block_square_k][block_square_l * SquareDpVectors]); - } - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/block_loader_crosswise.h b/cutlass/gemm/block_loader_crosswise.h deleted file mode 100644 index 13660e67..00000000 --- a/cutlass/gemm/block_loader_crosswise.h +++ /dev/null @@ -1,411 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Tile-loading abstraction for thread blocks - */ - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * block_loader (CrosswiseCopy specialization) - ******************************************************************************/ - -/** - * \brief A three-phase data loading abstraction (prefetch, commit, and - * advance) for iterating over ranges of block-wide matrix tiles. - * (CrosswiseCopy specialization) - * - * Each iteration sequence produces a KxL (height-by-width) block-wide tile of - * value_t in shared memory. The layout of the shared block-wide tile is - * a row-major (L-major) tiling of dp_vector_t items, which are themselves - * column-major (K-major) vectors of value_t. Its dimensions are: - * K = BlockDpVectorsK * (sizeof(dp_vector_t) / sizeof(value_t) - * L = BlockDpVectorsL - * - * The data is copied from a corresponding tile of global matrix data whose - * layout of value_t is K-major. This constitutes a CrosswiseCopy between - * the K-major global tile and the L-major shared tile. - * - * NB: The orientation of dp_vector_t components in shared memory is congruous - * with the global matrix data, so we can use dp_vector_t as the minimum - * granularity of data transfer without any intermediate {dis|re}assembly - * of its value_t components. However, the global and shared memory layouts - * of dp_vector_t items are cross-wise with respect to each other, so any - * further LDG-vectorization of dp_vector_t data requires intermediate - * disassembly into dp_vector_t components to be stored individually into - * the shared tile. - * - * NB: Consecutive threads within a block are mapped in K-major - * fashion down a first set of LDG-vectors of dp_vector_t within their global - * tile. Successive sets of LDG-vectors are then strip-mined as necessary - * across the L-axis. These discontiguous LDG-vectors comprise the thread's - * "slice" of the block-wide tile. - */ -template < - int BlockThreads, ///< Number of threads in each thread block (blockDim.x) - int BlockDpVectorsK, ///< Extent of block-wide tile in dp_vector_t along the K-axis (height) - int BlockDpVectorsL, ///< Extent of block-wide tile in dp_vector_t along the L-axis (width) - typename value_t, ///< Input matrix value type - int LeadingDimAlignBytes, ///< Byte alignment of input matrix leading dimension - bool AllowRaggedTiles, ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions - typename dp_vector_t> ///< Dot-product vector type along the K-axis -struct block_loader< - BlockThreads, - BlockDpVectorsK, - BlockDpVectorsL, - value_t, - LeadingDimAlignBytes, - AllowRaggedTiles, - dp_vector_t, - load_algorithm::CrosswiseCopy> ///< Algorithm for loading a shared tile of KxL matrix data (CrosswiseCopy specialization) -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - enum - { - /// Number of value_t in a dp_vector_t - DpVectorItems = divide_assert::value, - - /// Number of dp_vector_t in a block-wide tile - BlockDpVectors = BlockDpVectorsK * BlockDpVectorsL, - - /// Number of dp_vector_t in a thread-tile - ThreadDpVectors = divide_assert::value, - }; - - /// Data movement type, coarsened by LeadingDimAlignBytes, capped by the - /// smaller of either ThreadDpVectors or BlockDpVectorsK - typedef io_vector< - dp_vector_t, - __NV_STD_MIN(ThreadDpVectors, BlockDpVectorsK), - LeadingDimAlignBytes> - ldg_vector_t; - - enum - { - /// Number of dp_vector_t per ldg_vector_t - LdgVectorDpVectors = ldg_vector_t::VectorItems, - - /// Number of value_t per ldg_vector_t - LdgVectorItems = LdgVectorDpVectors * DpVectorItems, - - - - /// Total number of ldg_vector_t within each block-wide tile - BlockLdgVectors = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along K-axis - BlockLdgVectorsK = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along L-axis - BlockLdgVectorsL = BlockDpVectorsL, - - - - /// Number of ldg_vector_t within each thread-tile - ThreadLdgVectors = divide_assert::value, - - /// Extent of the thread tile in ldg_vector_t along K-axis - ThreadLdgVectorsK = __NV_STD_MAX(1, (BlockLdgVectorsK / BlockThreads)), - - /// Extent of the thread tile in ldg_vector_t along L-axis - ThreadLdgVectorsL = divide_assert::value, - - - - /// Number of ldg_vector_t within each stripmine-tile - StripmineLdgVectors = BlockThreads, - - /// Extent of the stripmine tile in ldg_vector_t along K-axis - StripmineLdgVectorsK = __NV_STD_MIN(BlockLdgVectorsK, StripmineLdgVectors), - - /// Extent of the stripmine tile in ldg_vector_t along L-axis - StripmineLdgVectorsL = divide_assert::value, - - - - /// Alignment in dp_vector_t along L needed for committing prefetch - AlignmentDpVectorsL = 1, - }; - - /// Predicate bit vector - typedef uint64_t predicate_mask_t; - - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - static_assert( - (ThreadLdgVectors <= sizeof(predicate_mask_t) * 8), - "Predicate mask type does not contain enough bits for encoding load predicates"); - - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Input pointer to matrix in ldg_vector_t - ldg_vector_t *d_matrix_ldgvecs; - - /// Extent of the input matrix in ldg_vector_t along the L-axis - int matrix_ldgvecs_l; - - /// Thread block's ending ldg_vector_t coordinate (k) within the input matrix (one-past) - int block_end_ldgvec_k; - - /// Predicate bits for guarding ldg_vector_t loads within "whole-k" block-wide tiles - predicate_mask_t guard; - - /// Predicate bits for guarding ldg_vector_t loads within the final block-wide "residue" tile - predicate_mask_t residue_guard; - - /// Iteration span in "whole-k" block-wide tiles - int wholek_tiles_remaining; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_ldgvec_stride_k; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the L-axis - int matrix_ldgvec_stride_l; - - /// ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - int2 block_thread_ldgvec_coords; - - /// Thread-wide tile of prefetch data - ldg_vector_t thread_tile[ThreadLdgVectorsK][ThreadLdgVectorsL]; - - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_loader( - value_t *d_matrix_items, ///< Input pointer to matrix in value_t - int matrix_items_l, ///< Extent of the input matrix in value_t along the L-axis - int matrix_items_stride_k, ///< Distance in value_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_items_stride_l, ///< Distance in value_t within pitched-linear memory between successive coordinates along the L-axis - int2 matrix_block_item_coords, ///< value_t coordinates (l, k) of first block-wide tile within the input matrix - int block_end_item_k) ///< Thread block's ending coordinate (k) within the input matrix (one-past) - : - block_end_ldgvec_k(block_end_item_k), - guard(0), - residue_guard(0) - { - matrix_ldgvecs_l = matrix_items_l; - matrix_ldgvec_stride_k = matrix_items_stride_k; - matrix_ldgvec_stride_l = (matrix_items_stride_l / LdgVectorItems); - - // ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - block_thread_ldgvec_coords = make_int2( - (threadIdx.x / BlockLdgVectorsK), // l-coordinate - (threadIdx.x % BlockLdgVectorsK)); // k-coordinate - - // ldg_vector_t coordinates (l, k) of first block-wide tile within the input matrix - int2 matrix_block_ldgvec_coords = make_int2( - matrix_block_item_coords.x, // l-coordinate - matrix_block_item_coords.y / LdgVectorItems); // k-coordinate - - // Iteration span in ldg_vector_t - int span_ldgvec_k = (block_end_item_k - matrix_block_item_coords.y) / LdgVectorItems; - - - - // ldg_vector_t coordinates (l, k) of first thread-tile tile within the input matrix - int2 matrix_thread_ldgvec_coords = make_int2( - block_thread_ldgvec_coords.x + matrix_block_ldgvec_coords.x, - block_thread_ldgvec_coords.y + matrix_block_ldgvec_coords.y); - - // Iteration range in "whole-k" block-wide tiles - wholek_tiles_remaining = span_ldgvec_k / BlockLdgVectorsK; - - // Extent of final residue-tile in ldg_vector_t along K-axis - int residue_ldgvecs_k = span_ldgvec_k % BlockLdgVectorsK; - - // Initialize I/O predicates - if (AllowRaggedTiles) - { - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - int block_ldgvec_k = block_thread_ldgvec_coords.y + (thread_ldgvec_k * StripmineLdgVectorsK); - - // Whether block_ldgvec_coords.y is valid in the final residue tile - predicate_mask_t valid_k = (block_ldgvec_k < residue_ldgvecs_k); - - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - // Whether block_ldgvec_coords.x is valid any block-wide tile - predicate_mask_t valid_l = (matrix_block_ldgvec_coords.x + block_ldgvec_l < matrix_ldgvecs_l); - - // Linear index of ldg_vector_t load - int ldgvec_idx = thread_ldgvec_l + (thread_ldgvec_k * ThreadLdgVectorsL); - - // Set predicate guard bits - guard |= (valid_l << ldgvec_idx); - residue_guard |= ((valid_l & valid_k) << ldgvec_idx); - } - } - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - - // Update the input pointer to be matrix_thread_ldgvec_coords - this->d_matrix_ldgvecs = - reinterpret_cast(d_matrix_items) + - (matrix_thread_ldgvec_coords.y * matrix_ldgvec_stride_k) + - (matrix_thread_ldgvec_coords.x * matrix_ldgvec_stride_l); - } - - - //------------------------------------------------------------------------- - // Loader API - //------------------------------------------------------------------------- - - /** - * Request the current block-wide tile - */ - inline __device__ - void request() - { - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - // Linear index of ldg_vector_t load - int ldgvec_idx = (thread_ldgvec_k * ThreadLdgVectorsL) + thread_ldgvec_l; - - // Unpack predicate guard - predicate_mask_t valid = ((guard >> ldgvec_idx) & 1); - - if (!AllowRaggedTiles || valid) - { - // Perform load - thread_tile[thread_ldgvec_k][thread_ldgvec_l].load( - d_matrix_ldgvecs + - (thread_ldgvec_k * StripmineLdgVectorsK * matrix_ldgvec_stride_k) + - (thread_ldgvec_l * StripmineLdgVectorsL * matrix_ldgvec_stride_l)); - } - else - { - // Zero-initialize - #pragma unroll - for (int dpvec = 0; dpvec < LdgVectorDpVectors; ++dpvec) - thread_tile[thread_ldgvec_k][thread_ldgvec_l].buff[dpvec] = 0; - } - } - } - } - - - /** - * Advance the loader to the next block-wide tile in the K-axis - */ - inline __device__ - void next() - { - d_matrix_ldgvecs += (matrix_ldgvec_stride_k * BlockLdgVectorsK); - - if (AllowRaggedTiles) - { - --wholek_tiles_remaining; - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - } - - - /** - * Commit the previously-requested block-wide tile to shared memory - * - * NB: To facilitate padding for avoiding shared memory bank conflicts, we - * allow the row stride SmemDpVectorsL to be arbitrarily bigger than the - * tile width BlockDpVectorsL. - */ - template - inline __device__ - void commit( - dp_vector_t (&scratch_tile)[BlockDpVectorsK][SmemDpVectorsL]) - { - static_assert(SmemDpVectorsL >= BlockDpVectorsL, "Row stride must be >= tile width."); - - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - int block_ldgvec_k = block_thread_ldgvec_coords.y + (thread_ldgvec_k * StripmineLdgVectorsK); - - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - // Write column of dp_vector_t - #pragma unroll - for (int dpvec = 0; dpvec < LdgVectorDpVectors; ++dpvec) - { - scratch_tile[(block_ldgvec_k * LdgVectorDpVectors) + dpvec][block_ldgvec_l] = - thread_tile[thread_ldgvec_k][thread_ldgvec_l].buff[dpvec]; - } - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/block_loader_wmma.h b/cutlass/gemm/block_loader_wmma.h deleted file mode 100644 index 5b586a1b..00000000 --- a/cutlass/gemm/block_loader_wmma.h +++ /dev/null @@ -1,322 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Tile-loading abstraction for thread blocks - */ - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/** - * block-wide tile loader supporting congruous mapping of data from source and - * destination addressable storage. Typically, this will be used to load a - * block-wide tile from global memory into shared memory. - * - * This enables the caller to specify MatrixAlignBytes guarantees of the input pointer - * and performs memory operations on vectors. This increases the efficiency of - * memory operations and reduces the number of guard predicates needed. - * - */ -template < - bool congruous, ///< Indicates whether the "GEMM K" dimension refers to strided matrix dimension - int BlockThreads, ///< Number of threads participating in the streaming operation - int BlockItemsL, ///< Extent of block-wide tile in value_t along the L-axis (width) - int BlockItemsK, ///< Extent of block-wide tile in value_t along the K-axis (height) - typename value_t, ///< Input matrix value type - int MatrixAlignBytes, ///< Byte alignment of input matrix - bool AllowRaggedTiles ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions -> -struct block_loader_wmma -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - /// Predicate bit vector - typedef uint64_t predicate_mask_t; - - /// Data movement type, coarsened by MatrixAlignBytes - typedef io_vector< - value_t, - divide_assert::value, - MatrixAlignBytes> - ldg_vector_t; - - enum - { - /// Number of items per ldg_vector_t - LdgVectorItems = ldg_vector_t::VectorItems, - - /// Total number of ldg_vector_t within the block-wide tile - BlockLdgVectors = divide_assert<(BlockItemsL * BlockItemsK), LdgVectorItems>::value, - - /// Extent of the block-wide tile in ldg_vector_t along K-axis - BlockLdgVectorsK = BlockItemsK, - - /// Extent of the block-wide tile in ldg_vector_t along L-axis - BlockLdgVectorsL = divide_assert::value, - - /// Number of ldg_vector_t within each thread tile - ThreadLdgVectors = divide_assert::value, - - /// Extent of the thread tile in ldg_vector_t along the L-axis - ThreadLdgVectorsL = __NV_STD_MAX(1, BlockLdgVectorsL / BlockThreads), - - /// Block-wide strip-mining distance between ldg_vector_t along the K-axis - BlockLdgVectorStrideK = __NV_STD_MAX(1, BlockThreads / BlockLdgVectorsL), - - /// Extent of the thread tile in ldg_vector_t along the K-axis - ThreadLdgVectorsK = divide_assert::value, - }; - - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - /// Define assertions - static_assert(ThreadLdgVectorsL * ThreadLdgVectorsK == ThreadLdgVectors, - "Number of vectors must be fully covered by the thread's 2D vector tile."); - - /// Predicate masks must be large enough to guard every vector load - static_assert(sizeof(predicate_mask_t) * 8 >= ThreadLdgVectorsL * ThreadLdgVectorsK, - "Predicate bit vector must be large enough to guard every vector load."); - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// pointer to tile in global memory - const ldg_vector_t *ptr; - - /// stride of the matrix in the K-axis - int matrix_values_stride_k; - - /// Guard predicate - predicate_mask_t guard; - - /// Guard for the last request iteration - predicate_mask_t residue_guard; - - /// Number of 'whole' request iterations before encountering the residue - int request_iterations; - - /// fetch registers - ldg_vector_t fetch[ThreadLdgVectors]; - - /// Thread's base offset from the start of a block-wide tile - int thread_offset_l; - - /// Thread's basae offset from the start of a block-wide tile - int thread_offset_k; - - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_loader_wmma( - const value_t *d_matrix, ///< Pointer to input matrix - int matrix_values_l, ///< Extent of the input matrix in value_t along the L-axis - int start_l, ///< Starting location in tile - int dim_k, ///< Inner dimension of tile, used for computing guard predicates - int _matrix_values_stride_k, ///< Stride of K-axis of atrix - int start_k, ///< Tile's starting location - int2 block_begin_item_coords) ///< Thread block's starting value_t coordinates (l, k) within the input matrix - : - ptr(reinterpret_cast(d_matrix)), - matrix_values_stride_k(_matrix_values_stride_k / LdgVectorItems), - guard(0), - residue_guard(0) - { - // Compute block's starting coordinates in units of vectors - int block_base_l = block_begin_item_coords.x / LdgVectorItems; - int block_base_k = block_begin_item_coords.y; - - // Compute a thread tiling of the block-wide tile - int tid = threadIdx.x; - thread_offset_l = tid % BlockLdgVectorsL; - thread_offset_k = tid / BlockLdgVectorsL; - - // Add the block and thread offsets to the source pointer - ptr += (block_base_l + thread_offset_l) + - (block_base_k + thread_offset_k) * matrix_values_stride_k; - - // When AllowRaggedTiles support is enabled, compute a bit vector of guard - // predicates - if (AllowRaggedTiles) - { - if (congruous) - { - request_iterations = (dim_k - start_k) / BlockItemsK; - } - else - { - request_iterations = (matrix_values_l - start_l) / BlockItemsL; - } - - #pragma unroll - for (int k_idx = 0; k_idx < ThreadLdgVectorsK; ++k_idx) - { - #pragma unroll - for (int l_idx = 0; l_idx < ThreadLdgVectorsL; ++l_idx) - { - int item = l_idx + k_idx * ThreadLdgVectorsL; - - // Global vector L and K indices - int vec_l = l_idx * BlockThreads; - int vec_k = k_idx * BlockLdgVectorStrideK; - - predicate_mask_t pred; - predicate_mask_t residue_pred; - - if (congruous) - { - pred = (((block_base_l + thread_offset_l + vec_l) * LdgVectorItems < matrix_values_l) ? 1 : 0); - residue_pred = ((block_base_k + thread_offset_k + vec_k < (dim_k % BlockItemsK)) ? 1 : 0); - } - else - { - pred = ((block_base_k + thread_offset_k + vec_k < dim_k) ? 1 : 0); - residue_pred = (((block_base_l + thread_offset_l + vec_l) * LdgVectorItems < (matrix_values_l % BlockItemsL)) ? 1 : 0); - } - - // Update the guard and residue_guard word with predicate bits - guard |= (pred << item); - residue_guard |= (residue_pred << item); - } - } - - // If there are zero full request iterations, compute the intersection - // with the residue guard. - if (!request_iterations) - { - guard &= residue_guard; - } - } - } - - - - /** - * Request the current block-wide tile from source memory - */ - inline __device__ - void request() - { - #pragma unroll - for (int k_idx = 0; k_idx < ThreadLdgVectorsK; ++k_idx) - { - #pragma unroll - for (int l_idx = 0; l_idx < ThreadLdgVectorsL; ++l_idx) - { - int load_idx = l_idx + (k_idx * ThreadLdgVectorsL); - bool pred = !AllowRaggedTiles || (guard & (predicate_mask_t(1) << load_idx)); - if (pred) - { - fetch[load_idx].load( - ptr + - (k_idx * BlockLdgVectorStrideK * matrix_values_stride_k) + (l_idx * BlockThreads)); - } - else - { - #pragma unroll - for (int elem_idx = 0; elem_idx < LdgVectorItems; ++elem_idx) - { - fetch[load_idx].buff[elem_idx] = 0; - } - } - } - } - } - - - /// Advance to the next block-wide tile - inline __device__ - void next() - { - if (congruous) - { - ptr += BlockItemsK * matrix_values_stride_k; - } - else - { - ptr += BlockLdgVectorsL; - } - - // Track number of full iterations to intersect with the residue guard predicates. - if (AllowRaggedTiles) - { - --request_iterations; - if (!request_iterations) - { - guard &= residue_guard; - } - } - } - - - /// Commit the values to the scratch tile to destination memory. - template - inline __device__ - void commit(value_t *scratch_tile) - { - static_assert(SmemStride % LdgVectorItems == 0, - "SMEM stride must be divisible by the size of vector loads"); - - ldg_vector_t *smem_ptr = reinterpret_cast(scratch_tile); - smem_ptr += thread_offset_l + thread_offset_k * SmemStride / LdgVectorItems; - - #pragma unroll - for (int k_idx = 0; k_idx < ThreadLdgVectorsK; ++k_idx) - { - #pragma unroll - for (int l_idx = 0; l_idx < ThreadLdgVectorsL; ++l_idx) - { - int load_idx = l_idx + (k_idx * ThreadLdgVectorsL); - - fetch[load_idx].store(smem_ptr + - (k_idx * BlockLdgVectorStrideK * SmemStride / LdgVectorItems) + - (l_idx * BlockThreads)); - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass - diff --git a/cutlass/gemm/block_task.h b/cutlass/gemm/block_task.h deleted file mode 100644 index 3940fb6e..00000000 --- a/cutlass/gemm/block_task.h +++ /dev/null @@ -1,677 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * A block-wide task abstraction for computing device-wide GEMM - */ - -#include - -#include "../util/util.h" - -#include "grid_raster.h" -#include "block_loader.h" -#include "k_split_control.h" -#include "thread_accumulator.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * block_task_policy - ******************************************************************************/ - -/** - * \brief Parameterizable tuning policy for \p block_task - * - * Once parameterized, \p block_task_policy provides the member constant - * \p BlockThreads indicating to the required thread block size - */ -template < - int _BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int _BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - int _BlockItemsK, ///< Extent of block-wide A|B tiles in value_t along the K-axis - int _ThreadItemsY, ///< Height in rows of a thread tile in C - int _ThreadItemsX, ///< Width in columns of a thread tile in C - bool _UseDoubleScratchTiles, ///< Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - grid_raster_strategy::kind_t _RasterStrategy> ///< Strategy for enumerating \p block_task within an input matrix -struct block_task_policy -{ - enum - { - /// Height in rows of a block-wide tile in matrix C - BlockItemsY = _BlockItemsY, - - /// Width in columns of a block-wide tile in matrix C - BlockItemsX = _BlockItemsX, - - /// Height in rows of a thread tile in C - ThreadItemsY = _ThreadItemsY, - - /// Width in columns of a thread tile in C - ThreadItemsX = _ThreadItemsX, - - /// Extent of block-wide A|B tiles in value_t along the K-axis - BlockItemsK = _BlockItemsK, - - /// Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - UseDoubleScratchTiles = _UseDoubleScratchTiles, - - /// Number of threads in each thread block (blockDim.x) - BlockThreads = divide_assert< - (BlockItemsY * BlockItemsX), - (ThreadItemsY * ThreadItemsX)>::value, - }; - - /// Strategy for enumerating \p block_task within an input matrix - static const grid_raster_strategy::kind_t RasterStrategy = _RasterStrategy; -}; - - -/****************************************************************************** - * block_task - ******************************************************************************/ - -/** - * \brief A block-wide task abstraction for computing device-wide GEMM - * - * Each thread_block is assigned a unique tile of output matrix C to compute by - * consuming the corresponding stripes of the input matrices A and B. - */ -template < - typename block_task_policy_t, ///< Parameterization of block_task_policy - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - int LdgAlignA, ///< Alignment (in bytes) for A operand - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - int LdgAlignB, ///< Alignment (in bytes) for B operand - typename epilogue_op_t, ///< Epilogue operation applied to GEMM - int LdgAlignC, ///< Alignment (in bytes) for C operand - bool AllowRaggedTiles ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions -> -struct block_task -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - enum - { - /// Number of threads in each thread block (blockDim.x) - BlockThreads = block_task_policy_t::BlockThreads, - - /// Extent of thread tile in value_t along M-axis - ThreadItemsY = block_task_policy_t::ThreadItemsY, - - /// Extent of thread tile in value_t along N-axis - ThreadItemsX = block_task_policy_t::ThreadItemsX, - }; - - /// Accumulator type - typedef thread_accumulator< - ThreadItemsY, - ThreadItemsX, - value_t, - accum_t> - thread_accumulator_t; - - /// Dot-product vector type along the K-axis (e.g, uchar4 when using IDP4A) - typedef typename thread_accumulator_t::dp_vector_t dp_vector_t; - - enum - { - /// Whether this is a small, latency-bound tile - IsSmallTile = (ThreadItemsY < 4) && (ThreadItemsX < 4), - - /// Number of value_t in dp_vector_t - DpVectorItems = divide_assert::value, - - /// Extent of block-wide C-tile in accum_t (and A-tiles in value_t) along M-axis (height) - BlockItemsY = block_task_policy_t::BlockItemsY, - - /// Extent of block-wide C-tile in accum_t (and B-tiles in value_t) along N-axis (width) - BlockItemsX = block_task_policy_t::BlockItemsX, - - /// Extent of block-wide A|B tiles in value_t along the K-axis - BlockItemsK = block_task_policy_t::BlockItemsK, - - /// Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - UseDoubleScratchTiles = block_task_policy_t::UseDoubleScratchTiles, - - /// Extent of block-wide A|B tiles in dp_vector_t along the K-axis - BlockDpVectorsK = divide_assert::value, - - /// Number of dp_vector_t along M-axis that can be read in a single LDS from the shared A-tile (up to 128b if more than one value_t) - LdsVectorDpVectorsA = __NV_STD_MIN( - ThreadItemsY, - __NV_STD_MAX(1, (128 / (__NV_STD_MAX(sizeof(dp_vector_t), sizeof(accum_t)) * 8)))), - - /// Number of dp_vector_t along N-axis that can be read in a single LDS from the shared B-tile (up to 128b if more than one value_t) - LdsVectorDpVectorsB = __NV_STD_MIN( - ThreadItemsX, - __NV_STD_MAX(1, (128 / (__NV_STD_MAX(sizeof(dp_vector_t), sizeof(accum_t)) * 8)))), - - /// Number of strip-mined LDS vector reads from shared A-tile - ThreadLdsVectorsA = divide_assert::value, - - /// Number of strip-mined LDS vector reads from shared B-tile - ThreadLdsVectorsB = divide_assert::value, - - /// Number of elements in one LDG/STG vector of C-tile - ThreadLdgVectorSizeC = __NV_STD_MIN(LdgAlignC, 16) / (sizeof(accum_t)), - - /// Number of threads in warp - WarpThreads = 32, - - /// Extent of warp in threads along the M-axis - WarpThreadsY = (BlockItemsY > BlockItemsX) ? 8 : 4, - - /// Extent of warp in threads along the N-axis - WarpThreadsX = divide_assert::value, - - /// Extent of warp-wide tile in items along the M-axis - WarpItemsY = WarpThreadsY * ThreadItemsY, - - /// Extent of warp-wide tile in items along the N-axis - WarpItemsX = WarpThreadsX * ThreadItemsX, - - /// Extent of block in warps along M-axis - BlockWarpsY = divide_assert::value, - - /// Extent of block in warps along N-axis - BlockWarpsX = divide_assert::value, - }; - - /// Load-from-shared data movement type for A-tile, coarsened by LdsVectorDpVectorsA - typedef io_vector lds_vector_a_t; - - /// Load-from-shared data movement type for B-tile, coarsened by LdsVectorDpVectorsB - typedef io_vector lds_vector_b_t; - - /// Thread block rasterization helper type - typedef grid_raster< - BlockItemsY, - BlockItemsX, - TransformA, - TransformB, - block_task_policy_t::RasterStrategy> - grid_raster_t; - - - /// Tile loader type for matrix A - typedef block_loader< - BlockThreads, // BlockThreads - BlockDpVectorsK, // BlockDpVectorsK - BlockItemsY, // BlockItemsL - value_t, // value_t - LdgAlignA, // MatrixAlignBytes - AllowRaggedTiles, // AllowRaggedTiles - dp_vector_t, // dp_vector_t - (TransformA == matrix_transform_t::NonTranspose) ? // LoadAlgorithm - load_algorithm::CongruousCopy : - load_algorithm::CrosswiseCopy> - block_loader_a_t; - - - /// Tile loader type for matrix B - typedef block_loader< - BlockThreads, // BlockThreads - BlockDpVectorsK, // BlockDpVectorsK - BlockItemsX, // BlockItemsL - value_t, // value_t - LdgAlignB, // MatrixAlignBytes - AllowRaggedTiles, // AllowRaggedTiles - dp_vector_t, // dp_vector_t - (TransformB == matrix_transform_t::NonTranspose) ? // LoadAlgorithm - load_algorithm::CrosswiseCopy : - load_algorithm::CongruousCopy> - block_loader_b_t; - - - enum - { - /// Number of value_t to pad the end of each row of the shared A-tile - PadItemsA = (TransformA == matrix_transform_t::NonTranspose) ? - __NV_STD_MAX(LdsVectorDpVectorsA, block_loader_a_t::AlignmentDpVectorsL) : - LdsVectorDpVectorsA, - - /// Number of value_t to pad the end of each row of the shared B-tile - PadItemsB = (TransformB == matrix_transform_t::NonTranspose) ? - LdsVectorDpVectorsB : - __NV_STD_MAX(LdsVectorDpVectorsB, block_loader_b_t::AlignmentDpVectorsL), - }; - - - /// Shared memory layout for a prefetch page - struct page_storage_t - { - /// Tile of A - dp_vector_t __align__(16) block_a[BlockDpVectorsK][BlockItemsY + PadItemsA]; - - /// Tile of B - dp_vector_t __align__(16) block_b[BlockDpVectorsK][BlockItemsX + PadItemsB]; - }; - - - /// Shared memory layout for scratch storage - struct scratch_storage_t - { - /// Prefetch pages - page_storage_t pages[UseDoubleScratchTiles ? 2 : 1]; - - /// Accumulator shared scratch - typename thread_accumulator_t::scratch_storage_t accum_scratch; - }; - - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - // Ensure we have at least two unrolled innermost loop iterations (one to prefetch - // the next global tile and then one to prefetch the first strip of it from shared) - static_assert ((BlockDpVectorsK >= 2), "BlockDpVectorsK must be >= 2."); - - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Scratch storage reference - scratch_storage_t *scratch; - - /// Which page of scratch tiles we're currently reading from - int page_idx; - - /// Pointer to matrix C - accum_t *d_c; - - /// Epilogue operation applied to update matrix C - epilogue_op_t epilogue_op; - - /// Matrix height in rows of trans_op(A) and C - int dim_m; - - /// Matrix width in columns of trans_op(B) and C - int dim_n; - - /// Control for inter-block k-splitting - k_split_control k_split; - - /// Thread block's base value_t coordinates (m, n) in matrix C - grid_raster_t grid_raster; - - /// Thread block's current coordinate (k) within A|B matrices - int block_item_coords_k; - - /// Thread block's ending coordinate (k) within A|B matrices (one-past) - int block_end_item_k; - - /// Warp's coordinates (x, y) in thread block - int2 block_warp_coords; - - /// Thread's coordinates (x, y) in warp - int2 warp_thread_coords; - - /// Thread's base item offset within strip of A tile - int thread_strip_offset_a; - - /// Thread's base item offset within strip of B tile - int thread_strip_offset_b; - - /// Thread's active-k/prefetch-k slices from shared A tile - lds_vector_a_t local_slices_a[2][ThreadLdsVectorsA]; - - /// Thread's active-k/prefetch-k slices from shared B tile - lds_vector_b_t local_slices_b[2][ThreadLdsVectorsB]; - - /// A tile loader - block_loader_a_t loader_a; - - /// B tile loader - block_loader_b_t loader_b; - - /// C tile accumulator - thread_accumulator_t accumulator; - - - //------------------------------------------------------------------------- - // Coordinate system helpers - //------------------------------------------------------------------------- - - /// Compute the warp's coordinates (x, y) in thread block - inline __device__ - int2 warp_coords() - { - int warp_id = threadIdx.x / WarpThreads; - return make_int2( - warp_id % BlockWarpsX, - warp_id / BlockWarpsX); - } - - - /// Compute the thread's lane-coordinates (x, y) in warp - inline __device__ - int2 thread_coords() - { - int lane_id = threadIdx.x % WarpThreads; - - // Maxwell+ mapping of threads within a 2D warp for maximal LDS bandwidth - return make_int2( - lane_id / WarpThreadsY, - lane_id % WarpThreadsY); - } - - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_task( - scratch_storage_t *scratch, - value_t *d_a, - value_t *d_b, - accum_t *d_c, - epilogue_op_t epilogue_op, - int dim_m, - int dim_n, - int dim_k, - k_split_control k_split) - : - scratch(scratch), - page_idx(0), - d_c(d_c), - epilogue_op(epilogue_op), - dim_m(dim_m), - dim_n(dim_n), - k_split(k_split), - block_item_coords_k(k_split.block_begin_item_k()), - block_end_item_k(k_split.block_end_item_k(dim_k)), - block_warp_coords(warp_coords()), - warp_thread_coords(thread_coords()), - thread_strip_offset_a((warp_thread_coords.y * LdsVectorDpVectorsA) + (block_warp_coords.y * WarpItemsY)), - thread_strip_offset_b((warp_thread_coords.x * LdsVectorDpVectorsB) + (block_warp_coords.x * WarpItemsX)), - - loader_a( - d_a, // d_matrix - dim_m, // matrix_values_l - (TransformA == matrix_transform_t::NonTranspose) ? dim_m : 1, // matrix_values_stride_k - (TransformA == matrix_transform_t::NonTranspose) ? 1 : dim_k, // matrix_values_stride_l - make_int2( // block_begin_item_coords - grid_raster.block_item_coords.y, - block_item_coords_k), - block_end_item_k), // block_end_item_k - - loader_b( - d_b, // d_matrix - dim_n, // matrix_values_l - (TransformB == matrix_transform_t::NonTranspose) ? 1 : dim_n, // matrix_values_stride_k - (TransformB == matrix_transform_t::NonTranspose) ? dim_k : 1, // matrix_values_stride_l - make_int2( // block_begin_item_coords - grid_raster.block_item_coords.x, - block_item_coords_k), - block_end_item_k), // block_end_item_k - - accumulator(scratch->accum_scratch) - {} - - - //------------------------------------------------------------------------- - // Prefetching utility methods - //------------------------------------------------------------------------- - - /** - * Request the calling thread's slices of the shared tiles at depth \p tile_offset_k - */ - inline __device__ void request_local_prefetch( - lds_vector_a_t (&slice_a)[ThreadLdsVectorsA], ///< Slice from A - lds_vector_b_t (&slice_b)[ThreadLdsVectorsB], ///< Slice from B - int tile_offset_k) - { - // Load B strip - for (int i = 0; i < ThreadLdsVectorsB; ++i) - { - slice_b[i].load( - &scratch->pages[page_idx].block_b[tile_offset_k][thread_strip_offset_b + (i * WarpThreadsX * LdsVectorDpVectorsB)]); - } - - // Load A strip - for (int i = 0; i < ThreadLdsVectorsA; ++i) - { - slice_a[i].load( - &scratch->pages[page_idx].block_a[tile_offset_k][thread_strip_offset_a + (i * WarpThreadsY * LdsVectorDpVectorsA)]); - } - } - - - //------------------------------------------------------------------------- - // Epilogue - //------------------------------------------------------------------------- - - /** - * Performs the GEMM epilogue: - * - Applies the scalar multipliers and addends to the accumulators - * - Write the result to the output matrix - */ - __forceinline__ __device__ - void epilogue() - { - // Wait for predecessor thread block(s) to produce block-wide tile of - // exclsuive partial-sums - k_split.wait(); - - // Configure epilogue as to whether the thread block is a secondary - // accumulator in an inter-block k-splitting scheme - if (k_split.is_secondary_accumulator()) - epilogue_op.set_secondary_accumulator(); - - // Whether the addend from C needs loading - bool must_init_addend = epilogue_op.must_init_addend(); - - #pragma unroll - for (int x = 0; x < ThreadItemsX; ++x) - { - #pragma unroll - for (int y = 0; y < ThreadItemsY; y += LdsVectorDpVectorsA) - { - int thread_strip_b = x / LdsVectorDpVectorsB; - int thread_strip_a = y / LdsVectorDpVectorsA; - - int thread_item_coords_tile_x = thread_strip_offset_b + (thread_strip_b * WarpThreadsX * LdsVectorDpVectorsB) + (x % LdsVectorDpVectorsB); - int thread_item_coords_tile_y = thread_strip_offset_a + (thread_strip_a * WarpThreadsY * LdsVectorDpVectorsA) + (y % LdsVectorDpVectorsA); - - int c_idx = (grid_raster.block_item_coords.x + thread_item_coords_tile_x) * dim_m + - grid_raster.block_item_coords.y + thread_item_coords_tile_y; - - accum_t *my_c = d_c + c_idx; - - #pragma unroll - for (int i = 0; i < LdsVectorDpVectorsA; ++i) - { - accum_t c_slice = accum_t(0); - accum_t *c_ptr = my_c + i; - - if ((grid_raster.block_item_coords.x + thread_item_coords_tile_x) < dim_n && - (grid_raster.block_item_coords.y + thread_item_coords_tile_y + i) < dim_m) - { - if (must_init_addend) - { - ldg_cg(c_slice, c_ptr); - } - - c_slice = epilogue_op(accumulator.get(x, y + i), c_slice, c_idx + i); - - stg_cg(c_ptr, c_slice); - } - } - } - } - - // Signal k-split successor thread_block that we have produced our block-wide - // tile of inclusive partial-sums - k_split.signal(); - } - - - //------------------------------------------------------------------------- - // Tile consumption - //------------------------------------------------------------------------- - - /** - * Consume a tile of A and B each - */ - template - __forceinline__ __device__ - void consume_tile() - { - // Unroll BlockDpVectorsK iterations of outer-product accumulations - #pragma unroll - for (int tile_offset_k = 0; tile_offset_k < BlockDpVectorsK; tile_offset_k += 1) - { - // Last strip commits global prefetch for next tile - if ((tile_offset_k == BlockDpVectorsK - 1) && DoGlobalPrefetch) - { - // If not using two pages of scratch tiles, protect the above prefetch loads from the committing writes below - if (!UseDoubleScratchTiles) - __syncthreads(); - - // If using two pages of scratch tiles, switch to next page before writing - if (UseDoubleScratchTiles) - { - page_idx = (page_idx ? 0 : 1); - } - - // Commit global prefetch data to scratch page - loader_a.commit(scratch->pages[page_idx].block_a); - loader_b.commit(scratch->pages[page_idx].block_b); - - __syncthreads(); - } - - // Request local prefetch for next strip - request_local_prefetch( - local_slices_a[(tile_offset_k + 1) % 2], - local_slices_b[(tile_offset_k + 1) % 2], - (tile_offset_k + 1) % BlockDpVectorsK); - - // Request global prefetch for next tile on first strip - if ((tile_offset_k == 0) && DoGlobalPrefetch) - { - loader_b.request(); - loader_b.next(); - loader_a.request(); - loader_a.next(); - } - - // Cast strip-mined loads to contiguous array of dp_vector_t - typedef dp_vector_t thread_tile_a_t[ThreadLdsVectorsA * LdsVectorDpVectorsA]; - typedef dp_vector_t thread_tile_b_t[ThreadLdsVectorsB * LdsVectorDpVectorsB]; - thread_tile_a_t &thread_tile_a = reinterpret_cast(local_slices_a[(tile_offset_k) % 2]); - thread_tile_b_t &thread_tile_b = reinterpret_cast(local_slices_b[(tile_offset_k) % 2]); - - // Accumulate this dp-stripe product - accumulator.multiply_accumulate(thread_tile_a, thread_tile_b); - } - } - - - //------------------------------------------------------------------------- - // GEMM API - //------------------------------------------------------------------------- - - /** - * Compute GEMM - */ - __forceinline__ __device__ - void run() - { - // Quit if the thread block is fully out-of-bounds - if (grid_raster.is_block_oob(dim_m, dim_n)) - { - asm volatile("exit;"); - } - - // Request global prefetch of first tile - loader_a.request(); - loader_a.next(); - loader_b.request(); - loader_b.next(); - - // Commit global prefetch of first tile to shared memory - loader_a.commit(scratch->pages[page_idx].block_a); - loader_b.commit(scratch->pages[page_idx].block_b); - - // Advance to next A,B tiles in K-axis - block_item_coords_k += BlockItemsK; - - // Synchronize shared tiles and prepared accumulator - __syncthreads(); - - // Initialize thread's slice of accumulators - accumulator.init(); - - // Request first iteration of local prefetch strips - request_local_prefetch( - local_slices_a[0], - local_slices_b[0], - 0); - - // - // Main loop - // - - // Consume tiles in A and B along the K-axis (all but last tile) - #pragma unroll 1 - while (block_item_coords_k < block_end_item_k) - { - consume_tile(); - - // Advance to next A,B tiles in K-axis - block_item_coords_k += BlockItemsK; - } - - // Consume last tile - consume_tile(); - - // - // Eplilogue - // - - epilogue(); - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/block_task_wmma.h b/cutlass/gemm/block_task_wmma.h deleted file mode 100644 index 03d2d222..00000000 --- a/cutlass/gemm/block_task_wmma.h +++ /dev/null @@ -1,767 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * A block-wide task abstraction for computing device-wide GEMM - */ - -#pragma once - -// Compiler guard conditional to avoid compilation errors on versions of CUDA that -// do not support the WMMA API. -#if defined (WMMA) - -#include - -#include "../util/util.h" - -#include "grid_raster.h" -#include "block_loader.h" -#include "block_loader_wmma.h" -#include "wmma_accumulator.h" - - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * block_task_wmma_policy - ******************************************************************************/ - -/** - * \brief Parameterizable tuning policy for block-wide WMMA GEMM tasks - * - * Once parameterized, \p block_task_policy provides the member constant - * \p BlockThreads indicating to the required thread block size - */ -template < - int _BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int _BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - int _BlockItemsK, ///< Extent of block-wide A|B tiles in value_t along the K-axis - int _WarpItemsY, ///< Height in rows of a Warp tile's accumulators - int _WarpItemsX, ///< Width in columns of a Warp tile's accumulators - int _WmmaItemsY, ///< Height in rows of a discrete WMMA block's accumulators - int _WmmaItemsX, ///< Width in columns of a discrete WMMA block's accumulators - int _WmmaItemsK, ///< Depth of each discrete WMMA block - bool _UseDoubleScratchTiles, ///< Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - grid_raster_strategy::kind_t _RasterStrategy> ///< Strategy for enumerating \p block_task within an input matrix -struct block_task_wmma_policy -{ - /// Strategy for enumerating \p block_task within an input matrix - static const grid_raster_strategy::kind_t RasterStrategy = _RasterStrategy; - - enum - { - /// Height in rows of a block-wide tile in matrix C - BlockItemsY = _BlockItemsY, - - /// Width in columns of a block-wide tile in matrix C - BlockItemsX = _BlockItemsX, - - /// Extent of block-wide A|B tiles in value_t along the K-axis - BlockItemsK = _BlockItemsK, - - /// Height in rows of a Warp tile's accumulators - WarpItemsX = _WarpItemsX, - - /// Width in columns of a Warp tile's accumulators - WarpItemsY = _WarpItemsY, - - /// Width in columns of a discrete WMMA block's accumulators - WmmaItemsX = _WmmaItemsX, - - /// Height in rows of a discrete WMMA block's accumulators - WmmaItemsY = _WmmaItemsY, - - /// Depth of each discrete WMMA block - WmmaItemsK = _WmmaItemsK, - - /// Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - UseDoubleScratchTiles = _UseDoubleScratchTiles, - - - // - // Derived quantities - // - - /// Machine warp size - WarpThreads = 32, - - /// Number of WMMA operations in the height dimension - WmmaBlocksY = divide_assert::value, - - /// Number of WMMA operations in the height dimension - WmmaBlocksX = divide_assert::value, - - /// Number of warps in each thread block - BlockWarps = divide_assert::value, - - /// Number of threads in each thread block (blockDim.x) - BlockThreads = BlockWarps * WarpThreads, - }; -}; - - -/****************************************************************************** - * block_task_wmma - ******************************************************************************/ - -/** - * \brief A block-wide task abstraction for computing device-wide GEMM - * - * Each thread_block is assigned a unique tile of output matrix C to compute by - * consuming the corresponding stripes of the input matrices A and B. - */ -template < - typename block_task_policy_t, ///< Parameterization of block_task_policy - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - int LdgAlignA, ///< Alignment (in bytes) for A operand - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - int LdgAlignB, ///< Alignment (in bytes) for B operand - typename epilogue_op_t, ///< Epilogue operation to update matrix C - int LdgAlignC, ///< Alignment (in bytes) for C operand - bool AllowRaggedTiles ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions -> -struct block_task_wmma -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - enum - { - /// Number of threads in each thread block (blockDim.x) - BlockThreads = block_task_policy_t::BlockThreads, - - /// Extent of block-wide C-tile in accum_t (and A-tiles in value_t) along M-axis (height) - BlockItemsY = block_task_policy_t::BlockItemsY, - - /// Extent of block-wide C-tile in accum_t (and B-tiles in value_t) along N-axis (width) - BlockItemsX = block_task_policy_t::BlockItemsX, - - /// Extent of block-wide A|B tiles in value_t along the K-axis - BlockItemsK = block_task_policy_t::BlockItemsK, - - /// Extent of warp C-tile in accum_t (and A-tiles in value_t) along M-axis (height) - WarpItemsY = block_task_policy_t::WarpItemsY, - - /// Extent of warp C-tile in accum_t (and B-tiles in value_t) along N-axis (width) - WarpItemsX = block_task_policy_t::WarpItemsX, - - /// Extent of warp C-tile in accum_t (and A-tiles in value_t) along M-axis (height) - WmmaItemsY = block_task_policy_t::WmmaItemsY, - - /// Extent of warp C-tile in accum_t (and B-tiles in value_t) along N-axis (width) - WmmaItemsX = block_task_policy_t::WmmaItemsX, - - /// Extent of warp-wide A|B-tiles in value_t along K-axis - WmmaItemsK = block_task_policy_t::WmmaItemsK, - - /// Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - UseDoubleScratchTiles = block_task_policy_t::UseDoubleScratchTiles, - - /// Number of threads in warp - WarpThreads = block_task_policy_t::WarpThreads, - - /// Number of warps participating - BlockWarps = block_task_policy_t::BlockWarps, - - /// Extent of block in warps along M-axis - BlockWarpsY = divide_assert::value, - - /// Extent of block in warps along N-axis - BlockWarpsX = divide_assert::value, - - /// Number of MMA unrolls - WmmaUnrollCount = divide_assert::value, - - /// True if the A matrix layout is column major (K is the strided dimension) - IsLayoutCongruousA = (TransformA == matrix_transform_t::NonTranspose), - - /// True if the B matrix layout is row mayor (K is the strided dimension) - IsLayoutCongruousB = (TransformB == matrix_transform_t::Transpose), - - }; - - /// WMMA may support unique types for A and B, so plan ahead for this - typedef value_t value_a_t; - - /// WMMA may support unique types for A and B, so plan ahead for this - typedef value_t value_b_t; - - /// WMMA accumulator type - typedef wmma_accumulator< - WarpItemsY, - WarpItemsX, - WmmaItemsY, - WmmaItemsX, - WmmaItemsK, - value_a_t, - value_b_t, - accum_t, - TransformA, - TransformB> - accumulator_t; - - /// Thread block rasterization helper type - typedef grid_raster< - BlockItemsY, - BlockItemsX, - TransformA, - TransformB, - block_task_policy_t::RasterStrategy> - grid_raster_t; - - /// Tile loader type for matrix A - typedef block_loader_wmma< - IsLayoutCongruousA, - BlockThreads, - (IsLayoutCongruousA ? BlockItemsY : BlockItemsK), - (IsLayoutCongruousA ? BlockItemsK : BlockItemsY), - value_a_t, - LdgAlignA, - AllowRaggedTiles> - block_loader_a_t; - - /// Tile loader type for matrix A - typedef block_loader_wmma< - IsLayoutCongruousB, - BlockThreads, - (IsLayoutCongruousB ? BlockItemsX : BlockItemsK), - (IsLayoutCongruousB ? BlockItemsK : BlockItemsX), - value_b_t, - LdgAlignB, - AllowRaggedTiles> - block_loader_b_t; - - /// Type alias for matrix A fragment type - typedef typename accumulator_t::fragment_a_t fragment_a_t; - - /// Type alias for matrix B fragment type - typedef typename accumulator_t::fragment_b_t fragment_b_t; - - enum - { - /// Number of fragments from A matrix - WmmaBlocksY = accumulator_t::WmmaBlocksY, - - /// Number of fragments from B matrix - WmmaBlocksX = accumulator_t::WmmaBlocksX, - - /// Number of value_t to pad the outer dimension of the shared A-tile - PadItemsA = 16, - - /// Number of value_t to pad the outer dimension of the shared B-tile - PadItemsB = 16, - - /// Leading dimension of A matrix tile - LdmSmemA = (IsLayoutCongruousA ? BlockItemsY: BlockItemsK) + PadItemsA, - - /// Leading dimension of A matrix tile - StridedSmemA = (IsLayoutCongruousA ? BlockItemsK : BlockItemsY ), - - /// Leading dimension of B matrix tile - LdmSmemB = (IsLayoutCongruousB? BlockItemsX : BlockItemsK) + PadItemsB, - - StridedSmemB = (IsLayoutCongruousB ? BlockItemsK : BlockItemsX), - }; - - /// Shared memory layout for a prefetch page - struct page_storage_t - { - /// Tile of A - value_a_t __align__(16) block_a[StridedSmemA][LdmSmemA]; - - /// Tile of B - value_b_t __align__(16) block_b[StridedSmemB][LdmSmemB]; - }; - - /// Shared memory layout for scratch storage - struct scratch_storage_t - { - union - { - /// Prefetch pages - uninitialized pages[UseDoubleScratchTiles ? 2 : 1]; - - /// Scratch storage for warps - accum_t epilogue[BlockWarps][WmmaItemsX * WmmaItemsY]; - }; - }; - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - // Ensure we have at least two unrolled innermost loop iterations (one to prefetch - // the next global tile and then one to prefetch the first strip of it from shared) - static_assert ((BlockItemsK >= 2), "BlockItemsK must be >= 2."); - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Scratch storage reference - scratch_storage_t *scratch; - - /// Which page of scratch tiles we're currently reading from - int page_idx; - - /// Pointer to matrix C - accum_t *d_c; - - /// Epilogue operation applied to update matrix C - epilogue_op_t epilogue_op; - - /// Matrix height in rows of trans_op(A) and C - int dim_m; - - /// Matrix width in columns of trans_op(B) and C - int dim_n; - - /// Control for inter-block k-splitting - k_split_control k_split; - - /// Thread block's base value_t coordinates (m, n) in matrix C - grid_raster_t grid_raster; - - /// Thread block's current coordinate (k) within A|B matrices - int block_item_coords_k; - - /// Thread block's ending coordinate (k) within A|B matrices (one-past) - int block_end_item_k; - - /// Warp's coordinates (x, y) in thread block - int2 block_warp_item_coords; - - /// A tile loader - block_loader_a_t loader_a; - - /// B tile loader - block_loader_b_t loader_b; - - /// Thread's active-k/prefetch-k slices from shared A tile - fragment_a_t local_slices_a[2][WmmaBlocksY]; - - /// Thread's active-k/prefetch-k slices from shared B tile - fragment_b_t local_slices_b[2][WmmaBlocksX]; - - /// Accumulator tile - accumulator_t accumulator; - - - //------------------------------------------------------------------------- - // Coordinate system helpers - //------------------------------------------------------------------------- - - /// Compute the warp's item-coordinates (x, y) in thread block - inline __device__ - int2 warp_item_coords() - { - int warp_id = threadIdx.x / WarpThreads; - - return make_int2( - (warp_id / BlockWarpsY) * WarpItemsX, - (warp_id % BlockWarpsY) * WarpItemsY); - } - - /// Compute the thread block's base item-coordinates in matrix A - inline __device__ - int2 a_block_item_coords() - { - if (TransformA == matrix_transform_t::NonTranspose) - { - return make_int2(grid_raster.block_item_coords.y, block_item_coords_k); - } - else - { - return make_int2(block_item_coords_k, grid_raster.block_item_coords.y); - } - } - - /// Compute the thread block's base item-coordinates in matrix B - inline __device__ - int2 b_block_item_coords() - { - if (TransformB == matrix_transform_t::Transpose) - { - return make_int2(grid_raster.block_item_coords.x, block_item_coords_k); - } - else - { - return make_int2(block_item_coords_k, grid_raster.block_item_coords.x); - } - } - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_task_wmma( - scratch_storage_t *scratch, - value_t *d_a, - value_t *d_b, - accum_t *d_c, - epilogue_op_t epilogue_op, - int dim_m, - int dim_n, - int dim_k, - k_split_control k_split) - : - scratch(scratch), - page_idx(0), - d_c(d_c), - epilogue_op(epilogue_op), - dim_m(dim_m), - dim_n(dim_n), - k_split(k_split), - block_item_coords_k(k_split.block_begin_item_k()), - block_end_item_k(k_split.block_end_item_k(dim_k)), - block_warp_item_coords(warp_item_coords()), - - loader_a( - reinterpret_cast(d_a), - (IsLayoutCongruousA ? dim_m : block_end_item_k), - (IsLayoutCongruousA ? 0 : block_item_coords_k), - (IsLayoutCongruousA ? block_end_item_k : dim_m), - (IsLayoutCongruousA ? dim_m : dim_k), - (IsLayoutCongruousA ? block_item_coords_k : 0), - a_block_item_coords()), - - loader_b( - reinterpret_cast(d_b), - (IsLayoutCongruousB ? dim_n : block_end_item_k), - (IsLayoutCongruousB ? 0 : block_item_coords_k), - (IsLayoutCongruousB ? block_end_item_k : dim_n), - (IsLayoutCongruousB ? dim_n : dim_k), - (IsLayoutCongruousB ? block_item_coords_k : 0), - b_block_item_coords()) - {} - - - //------------------------------------------------------------------------- - // Prefetching utility methods - //------------------------------------------------------------------------- - - /** - * Request the calling thread's slices of the shared tiles at depth \p tile_offset_k - */ - inline __device__ void request_local_prefetch( - fragment_a_t local_slices_a[WmmaBlocksY], ///< Slice from A - fragment_b_t local_slices_b[WmmaBlocksX], ///< Slice from B - int tile_offset_k) - { - value_b_t const *smem_A_base = &scratch->pages[page_idx].alias().block_a[0][0]; - value_b_t const *smem_B_base = &scratch->pages[page_idx].alias().block_b[0][0]; - - int constexpr kstride_a = (IsLayoutCongruousA ? LdmSmemA : 1); - int constexpr lstride_a = (IsLayoutCongruousA ? 1 : LdmSmemA); - - int constexpr kstride_b = (IsLayoutCongruousB ? LdmSmemB : 1); - int constexpr lstride_b = (IsLayoutCongruousB ? 1 : LdmSmemB); - - // Load B strip - #pragma unroll - for (int i = 0; i < WmmaBlocksX; ++i) - { - value_b_t const *smem_B_ptr = - &smem_B_base[tile_offset_k * kstride_b + (block_warp_item_coords.x + WmmaItemsX * i) * lstride_b]; - - nvcuda::wmma::load_matrix_sync(local_slices_b[i], smem_B_ptr, LdmSmemB); - } - - // Load A strip - #pragma unroll - for (int i = 0; i < WmmaBlocksY; ++i) - { - value_a_t const *smem_A_ptr = - &smem_A_base[tile_offset_k * kstride_a + (block_warp_item_coords.y + WmmaItemsY * i) * lstride_a]; - - nvcuda::wmma::load_matrix_sync(local_slices_a[i], smem_A_ptr, LdmSmemA); - } - } - - - //------------------------------------------------------------------------- - // Epilogue - //------------------------------------------------------------------------- - - /** - * Performs the GEMM epilogue: - * - Applies the scalar multipliers and addends to the accumulators - * - Write the result to the output matrix - */ - inline __device__ void epilogue() - { - // Wait for predecessor thread block(s) to produce partial-sums - k_split.wait(); - - // Configure epilogue as to whether the thread block is a secondary - // accumulator in an inter-block k-splitting scheme - if (k_split.is_secondary_accumulator()) - epilogue_op.set_secondary_accumulator(); - - // Whether or not the addend from C needs loading - bool must_init_addend = epilogue_op.must_init_addend(); - - int warp_base_x = grid_raster.block_item_coords.x + block_warp_item_coords.x; - int warp_base_y = grid_raster.block_item_coords.y + block_warp_item_coords.y; - - int constexpr SmemStride = WmmaItemsY; - - int warp_id = threadIdx.x / 32; - - // Compute shape of one accumulator read/modify/write operation - int constexpr ItemsY = (WmmaItemsY); - int constexpr ItemsX = (32 / ItemsY); - int constexpr IterationsX = WmmaItemsX / ItemsX; - - // Compute a rasterization of warp lanes across the WMMA tile. - int lane_id = (threadIdx.x % 32); - int lane_read_x = (lane_id / ItemsY); - int lane_read_y = (lane_id % ItemsY); - - accum_t *smem_scratch = scratch->epilogue[warp_id]; - accum_t const *smem_read_ptr = smem_scratch + lane_read_y + lane_read_x * SmemStride; - - #pragma unroll - for (int xb = 0; xb < WmmaBlocksX; ++xb) - { - #pragma unroll - for (int yb = 0; yb < WmmaBlocksY; ++yb) - { - // Store accumulator tile to SMEM - nvcuda::wmma::store_matrix_sync( - smem_scratch, - accumulator.accumulators[xb][yb], - SmemStride, - matrix_layout::kind); - - // Synchronize threads within the warp - __syncthreads(); - - // Compute lane coordinates so that each thread efficiently accesses SMEM. - int c_x = (warp_base_x + (xb) * WmmaItemsX + lane_read_x); - int c_y = (warp_base_y + (yb) * WmmaItemsY + lane_read_y); - - // Compute guard predicate by comparing against problem dimensions. - bool pred = c_y < dim_m; - - // Compute output pointer from lane coordinates - int c_index = c_x * dim_m + c_y; - accum_t *c_ptr = reinterpret_cast(d_c) + c_x * dim_m + c_y; - - // Iterate over columns of output tile. Load from SMEM, compute epilogue operation, - // and stream output to global memory - #pragma unroll - for (int item_x = 0; item_x < IterationsX; ++item_x) - { - accum_t accum = smem_read_ptr[item_x * ItemsX * SmemStride]; - accum_t c_element = 0; - - // Filter against problem dimensions as the warp iterates across the columns of - // output. - pred = (pred && ((c_x + item_x * ItemsX) < dim_n)); - - if (must_init_addend && pred) - { - // NB: inline PTX to utilize strong operations for inter-block synchronization. - // The following is equivalent to: - // - // c_element = c_ptr[0]; - asm volatile ("ld.global.cg.f32 %0, [%1];\n" : "=f"(c_element) : "l"(c_ptr)); - } - - c_element = epilogue_op(accum, c_element, c_index); - - if (pred) - { - // NB: inline PTX to utilize strong operations for inter-block synchronization. - // The following is equivalent to: - // - // c_ptr[0] = c_element; - - asm volatile ("st.global.cg.f32 [%0], %1;\n" : : "l"(c_ptr), "f"(c_element)); - } - - // Increment output pointer - c_ptr += dim_m * ItemsX; - c_index += dim_m * ItemsX; - } - __syncthreads(); - } - } - - // Signal k-split successor thread_block - k_split.signal(); - } - - //------------------------------------------------------------------------- - // Tile consumption - //------------------------------------------------------------------------- - - /** - * Consume a tile of A and B each - */ - template - inline __device__ - void consume_tile() - { - // Request global prefetch for next tile on first strip - if (DoGlobalPrefetch) - { - loader_b.request(); - loader_b.next(); - loader_a.request(); - loader_a.next(); - } - - // Unroll BlockDpVectorsK iterations of outer-product accumulations - #pragma unroll - for (int iteration = 0; iteration < WmmaUnrollCount; ++iteration) - { - int tile_offset_k = iteration * WmmaItemsK; - - // Active load-from-shared index - int active_lds_idx = __NV_STD_MIN(WmmaUnrollCount - 1, (iteration) % 2); - - // Next load-from-shared index - int next_lds_idx = __NV_STD_MIN(WmmaUnrollCount - 1, (iteration + 1) % 2); - - // The last unrolled iteration commits the global fetches - if ((iteration == WmmaUnrollCount - 1) && DoGlobalPrefetch) - { - // If not using two pages of scratch tiles, protect the above prefetch loads from - // the committing writes below - if (!UseDoubleScratchTiles) - { - __syncthreads(); - } - else - { - page_idx = (page_idx ? 0 : 1); - } - - // Commit global prefetch data to scratch page - loader_a.template commit(&scratch->pages[page_idx].alias().block_a[0][0]); - loader_b.template commit(&scratch->pages[page_idx].alias().block_b[0][0]); - - __syncthreads(); - } - - // Accumulate this dp-stripe product - accumulator.multiply_accumulate( - local_slices_a[active_lds_idx], - local_slices_b[active_lds_idx]); - - // Request local prefetch for next strip - request_local_prefetch( - local_slices_a[next_lds_idx], - local_slices_b[next_lds_idx], - (tile_offset_k + WmmaItemsK) % BlockItemsK); - } - } - - //------------------------------------------------------------------------- - // GEMM API - //------------------------------------------------------------------------- - - /** - * Compute GEMM - */ - inline __device__ - void run() - { - // Quit if the thread block is fully out-of-bounds - if (grid_raster.is_block_oob(dim_m, dim_n)) - { - asm volatile("exit;"); - } - - // Request global prefetch of first tile - loader_a.request(); - loader_a.next(); - loader_b.request(); - loader_b.next(); - - // Commit global prefetch of first tile to shared memory - loader_a.template commit(&scratch->pages[page_idx].alias().block_a[0][0]); - loader_b.template commit(&scratch->pages[page_idx].alias().block_b[0][0]); - - // Advance to next A,B tiles in K-axis - block_item_coords_k += BlockItemsK; - - // Synchronize shared tiles and prepared accumulator - __syncthreads(); - - // Initialize thread's slice of accumulators - accumulator.init(); - - // Request first iteration of local prefetch strips - request_local_prefetch( - local_slices_a[0], - local_slices_b[0], - 0); - - // - // Main loop - // - - // Consume tiles in A and B along the K-axis (all but last tile) - #pragma unroll 1 - while (block_item_coords_k < block_end_item_k) - { - consume_tile(); - - // Advance to next A,B tiles in K-axis - block_item_coords_k += BlockItemsK; - } - - consume_tile(); - - // - // Eplilogue - // - - // prevent overwriting SMEM until all warps have finished loading data - __syncthreads(); - - // store accumulator tile to global memory - epilogue(); - } -}; - -} // namespace gemm -} // namespace cutlass - -#endif diff --git a/cutlass/gemm/clear_accumulators.h b/cutlass/gemm/clear_accumulators.h new file mode 100644 index 00000000..12e1f579 --- /dev/null +++ b/cutlass/gemm/clear_accumulators.h @@ -0,0 +1,55 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines abstractions for efficiently clearing accumulator tiles. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ClearAccumulators { + /// The shared storage. + struct SharedStorage {}; + + /// Ctor. + CUTLASS_DEVICE ClearAccumulators(SharedStorage& shared_storage) {} + + /// Clear the fragment. + template + CUTLASS_DEVICE void clear(Fragment_& fragment) { + fragment.clear(); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/dgemm_traits.h b/cutlass/gemm/dgemm_traits.h new file mode 100644 index 00000000..0bbc2210 --- /dev/null +++ b/cutlass/gemm/dgemm_traits.h @@ -0,0 +1,127 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines structural traits of double-precision GEMM. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_ = 1> +struct DgemmConfig + : public GemmConfig< + /// The scalar type for A. + double, + /// The scalar type for B. + double, + /// The scalar type for C. + double, + /// The scalar type for D. + double, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, double, double, double>, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 2, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 2, + /// The number of scalars per LDG for C and STG for D. + 1, + /// The number of scalars per STS for D. + 2, + /// The number of scalars per LDS for D. + 1, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_ = Shape<8, 64, 128>, + /// The functor to use in the epilogue. + typename EpilogueFunctor_ = LinearScaling, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<8, 8, 8>, + /// The number of doubles loaded in one LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of doubles loaded in one LDG for B. + int kScalarsPerLdgB_ = 1, + /// The index. + typename Index_ = int, + /// The DGEMM config. + typename GemmConfig_ = + DgemmConfig, + /// The traits class for the epilogue. + typename GemmEpilogueTraits_ = + SimplifiedGemmEpilogueTraits > +struct DgemmTraits : public SimplifiedGemmTraits< + // The layout for A. + kLayoutA_, + // The layout for B. + kLayoutB_, + // The config. + GemmConfig_, + // The epilogue. + GemmEpilogue, + // The index. + Index_> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/dispatch.h b/cutlass/gemm/dispatch.h deleted file mode 100644 index ae1819a6..00000000 --- a/cutlass/gemm/dispatch.h +++ /dev/null @@ -1,542 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * GEMM kernel entrypoint and dispatch stub - */ - -#include - -#include "../util/util.h" -#include "block_task.h" -#include "block_task_wmma.h" -#include "grid_raster.h" -#include "dispatch_policies.h" -#include "k_split_control.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * param_pack - ******************************************************************************/ - -/** - * Parameter-pack structure - * - * Kernel launch latency is reduced when kernel arguments are wrapped into - * a single parameter - */ -template < - typename value_t, - typename accum_t, - typename epilogue_op_t> -struct param_pack -{ - int m; ///< Height in rows of op(A) and C - int n; ///< Width in columns of op(B) and C - int k; ///< Width in columns of op(A) and height in rows of op(B) - k_split_control k_split; ///< Abstraction for controlling inter-block k-splitting - value_t *d_a; ///< Pointer to matrix A array values - value_t *d_b; ///< Pointer to matrix B array values - accum_t *d_c; ///< Pointer to matrix C array values - epilogue_op_t epilogue_op; - - param_pack( - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - k_split_control k_split, ///< Abstraction for controlling inter-block k-splitting - epilogue_op_t op, ///< Epilogue operation to update matrix C - value_t *d_a, ///< Pointer to matrix A array values - value_t *d_b, ///< Pointer to matrix B array values - accum_t *d_c) ///< Pointer to matrix C array values - : - m(m), - n(n), - k(k), - k_split(k_split), - epilogue_op(op), - d_a(d_a), - d_b(d_b), - d_c(d_c) - {} - -}; - - -/****************************************************************************** - * Conditionally select the appropriate GEMM threadblock task - ******************************************************************************/ - -/// Conditional selection for block task -template < - math_operation_class_t math_op, ///< - typename block_task_policy_t, ///< Parameterization of block_task_policy - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - int LdgAlignA, ///< Alignment (in bytes) for A operand - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - int LdgAlignB, ///< Alignment (in bytes) for B operand - typename epilogue_op_t, ///< Epilogue operation applied to GEMM - int LdgAlignC, ///< Alignment (in bytes) for C operand - bool AllowRaggedTiles ///< Whether GEMM supports matrix sizes other than multiple of BlockItems{XY} -> -struct gemm_block_task; - -/// Scalar math operations -template < - typename block_task_policy_t, ///< Parameterization of block_task_policy - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - int LdgAlignA, ///< Alignment (in bytes) for A operand - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - int LdgAlignB, ///< Alignment (in bytes) for B operand - typename epilogue_op_t, ///< Epilogue operation applied to GEMM - int LdgAlignC, ///< Alignment (in bytes) for C operand - bool AllowRaggedTiles ///< Whether GEMM supports matrix sizes other than multiple of BlockItems{XY} -> -struct gemm_block_task< - math_operation_class_t::scalar, - block_task_policy_t, - value_t, - accum_t, - TransformA, - LdgAlignA, - TransformB, - LdgAlignB, - epilogue_op_t, - LdgAlignC, - AllowRaggedTiles -> -{ - // Parameterize task type - typedef block_task< - block_task_policy_t, - value_t, - accum_t, - TransformA, - LdgAlignA, - TransformB, - LdgAlignB, - epilogue_op_t, - LdgAlignC, - AllowRaggedTiles> type; -}; - -/// Matrix math operations -template < - typename block_task_policy_t, ///< Parameterization of block_task_policy - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - int LdgAlignA, ///< Alignment (in bytes) for A operand - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - int LdgAlignB, ///< Alignment (in bytes) for B operand - typename epilogue_op_t, ///< Epilogue operation applied to GEMM - int LdgAlignC, ///< Alignment (in bytes) for C operand - bool AllowRaggedTiles ///< Whether GEMM supports matrix sizes other than multiple of BlockItems{XY} -> -struct gemm_block_task< - math_operation_class_t::matrix, - block_task_policy_t, - value_t, - accum_t, - TransformA, - LdgAlignA, - TransformB, - LdgAlignB, - epilogue_op_t, - LdgAlignC, - AllowRaggedTiles> -{ - -#if defined(WMMA) // conditional compilation with WMMA headers - - // Parameterize task type - typedef block_task_wmma< - block_task_policy_t, - value_t, - accum_t, - TransformA, - LdgAlignA, - TransformB, - LdgAlignB, - epilogue_op_t, - LdgAlignC, - AllowRaggedTiles> type; - -#endif -}; - -/****************************************************************************** - * GEMM kernel entrypoint - ******************************************************************************/ - -/** - * GEMM kernel - * - * NB: Not sure why NVVM is doing stuff with "__launch_bounds__" instead of just - * passing it along to PTXAS, but it is currently resulting in less optimal codegen - */ -template < - math_operation_class_t math_op, ///< Indicates which class of math operation to select - typename block_task_policy_t, ///< Parameterization of block_task_policy - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - int LdgAlignA, ///< Alignment of A matrix elements in bytes - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - int LdgAlignB, ///< Alignment of B matrix elements in bytes - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - typename epilogue_op_t, ///< Epilogue operation applied to update matrix C - int LdgAlignC, ///< Alignment of C elements in bytes - bool AllowRaggedTiles> ///< Boolean to indicate whether AllowRaggedTiles handling is enabled -__global__ void kernel(param_pack pack) -{ - // Parameterize task type - typedef typename gemm_block_task< - math_op, - block_task_policy_t, - value_t, - accum_t, - TransformA, - LdgAlignA, - TransformB, - LdgAlignB, - epilogue_op_t, - LdgAlignC, - AllowRaggedTiles>::type block_task_t; - - // Declare statically-allocated shared storage - __shared__ typename block_task_t::scratch_storage_t smem; - - // Construct and run the task - block_task_t( - &smem, - pack.d_a, - pack.d_b, - pack.d_c, - pack.epilogue_op, - pack.m, - pack.n, - pack.k, - pack.k_split).run(); -} - - -/****************************************************************************** - * Launch configuration description returned to the caller - ******************************************************************************/ - -/// Return details about the launch configuration to the caller -struct launch_configuration -{ - // - // Data members - // - - /// cudaError_t resulting from grid launch - cudaError_t result; - - /// Extent of a thread block's partition along the GEMM K-axis - int split_k; - - /// Kernel grid extents in thread blocks - dim3 grid; - - /// Thread block extents in threads - dim3 block; - - // - // Methods - // - - /// Constructor - launch_configuration(): - result(cudaSuccess), - split_k(0), - grid(0, 0, 0), - block(0, 0, 0) { - - } - - /// Conversion from cudaError_t - launch_configuration(cudaError_t result): - result(result), - split_k(1), - grid(0, 0, 0), - block(0, 0, 0) { - - } - - /// Launch configuration for Cutlass kernels - launch_configuration( - cudaError_t result, - int split_k, - dim3 grid, - dim3 block - ): - result(result), - split_k(split_k), - grid(grid), - block(block) { - - } -}; - - -/****************************************************************************** - * Dispatch stub - ******************************************************************************/ - -/** - * GEMM dispatch stub - * - * This function also serves as the autotuning entrypoint to evaluate different - * tuning parameterizations of kernel. - */ -template < - math_operation_class_t math_op, ///< Indicates which class of math operation to select - typename block_task_policy_t, ///< Parameterization of block_task_policy - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - int LdgAlignA, ///< Alignment of A matrix elements in bytes - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - int LdgAlignB, ///< Alignment of B matrix elements in bytes - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - typename epilogue_op_t, ///< Epilogue operation - int LdgAlignC, ///< Alignment of C matrix elements in bytes - bool AllowRaggedTiles, ///< Boolean to indicate whether AllowRaggedTiles handling is enabled - typename kernel_ptr_t> ///< GEMM kernel function pointer type -launch_configuration dispatch( - kernel_ptr_t kernel_ptr, ///< GEMM kernel function pointer - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - epilogue_op_t epilogue_op, ///< Epilogue operation to update matrix C - value_t *d_a, ///< Device pointer to matrix A array values - value_t *d_b, ///< Device pointer to matrix B array values - accum_t *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = true) ///< Whether or not to synchronize the stream after every kernel launch - /// to check for errors. Also causes launch configurations to be printed - /// to the console if DEBUG is defined. Default is \p false. -{ - // Thread block rasterization type - typedef grid_raster< - block_task_policy_t::BlockItemsY, - block_task_policy_t::BlockItemsX, - TransformA, - TransformB, - block_task_policy_t::RasterStrategy> - grid_raster_t; - - launch_configuration config; - - // Compute block dims - config.block = dim3(block_task_policy_t::BlockThreads); - - // Compute shared memory - int dynamic_smem_bytes = 0; - - // Compute occupancy - int max_sm_occupancy; - if (CUDA_PERROR_DEBUG(config.result = cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &max_sm_occupancy, - kernel_ptr, - config.block.x * config.block.y, - dynamic_smem_bytes))) - { - return config; - } - - // Compute grid extents - config.grid = grid_raster_t::grid_dims(m, n); - - // Get SM count - int sm_count; - if (CUDA_PERROR_DEBUG(config.result = get_sm_count(sm_count))) - return config; - - // Get k-split flag storage (TODO: make a pool) - int *d_flags; - if (CUDA_PERROR_DEBUG(config.result = cudaGetSymbolAddress((void**) &d_flags, d_flags_split_k))) - return config; - - // Construct k-split coordinator - k_split_control k_split( - d_flags, - sm_count, - max_sm_occupancy, - k, - block_task_policy_t::BlockItemsK, - config.block, - config.grid); // in,out - - config.split_k = k_split.split_k; - - // Log kernel configuration - if (debug_synchronous) - { - // Compute tiling efficiency - float block_tiling_efficiency = float(block_task_policy_t::BlockItemsY * block_task_policy_t::BlockItemsX) / - float(block_task_policy_t::BlockItemsY + block_task_policy_t::BlockItemsX); - - float tiling_efficiency = block_tiling_efficiency; - - float wave_efficiency = k_split.get_wave_efficiency( - sm_count, max_sm_occupancy, config.block, config.grid); - - CUDA_LOG_DEBUG("Final wave_efficiency %.4f, tiling_efficiency %.4f\n", - wave_efficiency, tiling_efficiency); - - CUDA_LOG_DEBUG("Invoking kernel<<<(%d, %d, %d), (%d.y,%d.x), %d, %lld>>>(), %d SM occupancy, %d split_k\n", - config.grid.x, config.grid.y, config.grid.z, - config.block.y, config.block.x, - dynamic_smem_bytes, - (long long) stream, - max_sm_occupancy, - k_split.split_k); - } - - // Construct parameter-pack - param_pack pack( - m, - n, - k, - k_split, - epilogue_op, - d_a, - d_b, - d_c); - - // Prepare k-split coordinator - if (CUDA_PERROR_DEBUG(config.result = k_split.prepare(stream, debug_synchronous))) - { - return config; - } - - // Invoke kernel - kernel_ptr<<< config.grid, config.block, dynamic_smem_bytes, stream >>>(pack); - - // Check for failure to launch - if (CUDA_PERROR_DEBUG(config.result = cudaPeekAtLastError())) - return config; - - // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CUDA_PERROR_DEBUG(config.result = cudaStreamSynchronize(stream)))) - return config; - - return config; -} - - -/****************************************************************************** - * GEMM - ******************************************************************************/ - -/** - * Computes gemm on device matrices - */ -template < - tiling_strategy::kind_t TilingStrategy, ///< Tile-sizing classification - math_operation_class_t math_op, ///< Indicates which class of math operation to select - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - int LdgAlignA, ///< Alignment (in bytes) of A operand - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - int LdgAlignB, ///< Alignment (in bytes) of B operand - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - typename epilogue_op_t, ///< Epilogue operation to update matrix C - int LdgAlignC> ///< Alignment (in bytes) of C operand -launch_configuration device_gemm( - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - epilogue_op_t epilogue_op, ///< Epilogue operation to update matrix C - value_t *d_a, ///< Device pointer to matrix A array values - value_t *d_b, ///< Device pointer to matrix B array values - accum_t *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to - /// check for errors. Also causes launch configurations to be printed to - /// the console if DEBUG is defined. Default is \p false. -{ - // Parameterize an task policy type - // (TODO: use a policy dispatch mechanism based upon SM version) - typedef gemm_policy block_task_policy_t; - - // AllowRaggedTiles-tile check - if ((m % block_task_policy_t::BlockItemsY != 0) || - (n % block_task_policy_t::BlockItemsX != 0) || - (k % block_task_policy_t::BlockItemsK != 0)) - { - // Needs ragged tile-handling - static const bool AllowRaggedTiles = true; - - return dispatch( - kernel, - m, - n, - k, - epilogue_op, - d_a, - d_b, - d_c, - stream, - debug_synchronous); - } - else - { - // Does not need ragged tile-handling - static const bool AllowRaggedTiles = false; - - return dispatch( - kernel, - m, - n, - k, - epilogue_op, - d_a, - d_b, - d_c, - stream, - debug_synchronous); - } - - -} - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/dispatch_policies.h b/cutlass/gemm/dispatch_policies.h deleted file mode 100644 index 298f7be5..00000000 --- a/cutlass/gemm/dispatch_policies.h +++ /dev/null @@ -1,661 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Architecture-specific GEMM block_task policies - */ - -#include - -#include "../util/util.h" -#include "block_task.h" -#include "grid_raster.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * tiling_strategy - ******************************************************************************/ - -/** - * Enumeration of tile-sizing granularities - */ -struct tiling_strategy : printable_t -{ - /// \brief Enumerants - enum kind_t - { - Unknown, - Small, - Medium, - Large, - Tall, - Wide, - Huge, - }; - - /// Enumerant value - kind_t kind; - - /// Default constructor - tiling_strategy() : kind(Unknown) {} - - /// Copy constructor - tiling_strategy(const kind_t &other_kind) : kind(other_kind) {} - - /// Cast to kind_t - operator kind_t() const { return kind; } - - /// Returns the instance as a string - __host__ __device__ inline - char const* to_string() const - { - switch (kind) - { - case Small: return "small"; - case Medium: return "medium"; - case Large: return "large"; - case Tall: return "tall"; - case Wide: return "wide"; - case Huge: return "huge"; - case Unknown: - default: return "unknown"; - } - } - - /// Insert the formatted instance into the output stream - void print(std::ostream& out) const { out << to_string(); } -}; - - -/****************************************************************************** - * GEMM - ******************************************************************************/ - -/** - * GEMM task policy specialization for sgemm - */ -template < - typename value_t, - typename accum_t, - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - tiling_strategy::kind_t TilingStrategy> ///< Tile-sizing classification -struct gemm_policy; - - -/****************************************************************************** - * SGEMM - ******************************************************************************/ - -/** - * GEMM task policy specialization for small sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 16, // _BlockItemsY - 16, // _BlockItemsX - 16, // _BlockItemsK - 2, // _ThreadItemsY - 2, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/** - * GEMM task policy specialization for medium sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 8, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 64, // _BlockItemsY - 64, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for tall sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 128, // _BlockItemsY - 32, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for wide sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 32, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 4, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for huge sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 128, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/****************************************************************************** - * DGEMM - ******************************************************************************/ - -/** - * GEMM task policy specialization for small dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 16, // _BlockItemsY - 16, // _BlockItemsX - 16, // _BlockItemsK - 2, // _ThreadItemsY - 2, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/** - * GEMM task policy specialization for medium dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 16, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 64, // _BlockItemsY - 64, // _BlockItemsX - 8, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for tall dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 128, // _BlockItemsY - 32, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for wide dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 32, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 4, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for huge dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 64, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/****************************************************************************** - * HGEMM - ******************************************************************************/ - -/** - * GEMM task policy specialization for small hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Small> : - block_task_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 8, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for medium hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Medium> : - block_task_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 16, // _BlockItemsK - 8, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Large> : - block_task_policy< - 64, // _BlockItemsY - 64, // _BlockItemsX - 8, // _BlockItemsK - 16, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for tall hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Tall> : - block_task_policy< - 128, // _BlockItemsY - 32, // _BlockItemsX - 8, // _BlockItemsK - 16, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/** - * GEMM task policy specialization for wide hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Wide> : - block_task_policy< - 32, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for huge hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Huge> : - block_task_policy< - 128, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 16, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/****************************************************************************** - * IGEMM - ******************************************************************************/ - -/** - * GEMM task policy specialization for small igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 16, // _BlockItemsY - 32, // _BlockItemsX - 32, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/** - * GEMM task policy specialization for medium igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 32, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 64, // _BlockItemsY - 64, // _BlockItemsX - 32, // _BlockItemsK - 8, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 128, // _BlockItemsY - 64, // _BlockItemsX - 64, // _BlockItemsK - 8, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 64, // _BlockItemsY - 128, // _BlockItemsX - 64, // _BlockItemsK - 4, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for huge igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 128, // _BlockItemsY - 128, // _BlockItemsX - 32, // _BlockItemsK - 8, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/****************************************************************************** - * WMMA GEMM - ******************************************************************************/ - -// WMMA is a preview feature in CUDA. Conditionally enable wmma_gemm policies. -#if defined(WMMA) - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - gemm::block_task_wmma_policy< - 16, // _BlockItemsY - 16, // _BlockItemsX - 16, // _BlockItemsK - 16, // _WarpItemsY - 16, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - gemm::block_task_wmma_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 32, // _BlockItemsK - 32, // _WarpItemsY - 32, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy< half, float, TransformA, TransformB, tiling_strategy::Large> : - gemm::block_task_wmma_policy< - 64, // _BlockItemsY - 64, // _BlockItemsX - 32, // _BlockItemsK - 32, // _WarpItemsY - 64, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy< half, float, TransformA, TransformB, tiling_strategy::Tall> : - gemm::block_task_wmma_policy< - 128, // _BlockItemsY - 64, // _BlockItemsX - 64, // _BlockItemsK - 32, // _WarpItemsY - 64, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy< half, float, TransformA, TransformB, tiling_strategy::Wide> : - gemm::block_task_wmma_policy< - 64, // _BlockItemsY - 128, // _BlockItemsX - 64, // _BlockItemsK - 32, // _WarpItemsY - 64, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy< half, float, TransformA, TransformB, tiling_strategy::Huge> : - gemm::block_task_wmma_policy< - 128, // _BlockItemsY - 128, // _BlockItemsX - 64, // _BlockItemsK - 32, // _WarpItemsY - 64, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -#endif - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/dp_accummulate.h b/cutlass/gemm/dp_accummulate.h deleted file mode 100644 index 6b5d4b6c..00000000 --- a/cutlass/gemm/dp_accummulate.h +++ /dev/null @@ -1,223 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Abstraction for exposing architecture-specific "dot-product-accumulate" - * ISA operations - */ - -#include - -#include "../util/util.h" - - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * dp_accummulate - ******************************************************************************/ - - -/** - * \brief Abstraction for exposing architecture-specific "dot-product-accumulate" - * ISA operations - * - * Given two K-component vectors a and b having type value_t[K] and an addend c - * of type accum_t, the "dot-product-accumulate" of type accum_t is computed - * as d = x[0]*y[0] + x[1]*y[1] + ... + x[K-1]*y[K-1] + c. - * - * We use the notation "dpK" to connote a K-component dot-product-accumulate. - * For example, "dp1" is a simple multiply-add. - * - * For given pairing of value_t and accum_t types, the corresponding - * dp_accummulate class will: - * - * - Define the member-type dp_vector_t as the appropriate K-component vector - * type needed to leverage architecture-specific "dot-product accumulate" - * ISA operations. - * - Implement the corresponding dot-product operation between two dp_vector_t - * inputs a and b. - * - */ -template < - typename value_t, ///< Component value type - typename accum_t> ///< Accumulator value type -struct dp_accummulate; - - - -/// Default "dp1" dot-product-accumulate traits specialization for value_t->accum_t -template < - typename value_t, ///< Component value type - typename accum_t> ///< Accumulator value type -struct dp_accummulate -{ - /// Single-component "dp1" dot-product vector type - typedef value_t dp_vector_t; - - - /// Compute "dp1" float->float - inline __device__ - static void mad( - float &d, - const float &a, - const float &b, - const float &c) - { - asm volatile ( "fma.rn.f32 %0, %1, %2, %3;\n" - : "=f"(d) : "f"(a), "f"(b), "f"(c)); - } - - - /// Compute "dp1" double->double - inline __device__ - static void mad( - double &d, - const double &a, - const double &b, - const double &c) - { - asm volatile ("fma.rn.f64 %0, %1, %2, %3;\n" - : "=d"(d) : "d"(a), "d"(b), "d"(c)); - } - - - /// Compute "dp1" int16_t->int32_t - inline __device__ - static void mad( - int32_t &d, - const int16_t &a, - const int16_t &b, - const int32_t &c) - { - asm volatile ("mad.wide.s16 %0, %1, %2, %3;\n" - : "=r"(d) : "h"(a), "h"(b), "r"(c)); - } - - - /// Compute "dp1" uint16_t->uint32_t - inline __device__ - static void mad( - uint32_t &d, - const uint16_t &a, - const uint16_t &b, - const uint32_t &c) - { - asm volatile ("mad.wide.u16 %0, %1, %2, %3;\n" - : "=r"(d) : "h"(a), "h"(b), "r"(c)); - } - - - /// Compute "dp1" int32_t->int32_t - inline __device__ - static void mad( - int32_t &d, - const int32_t &a, - const int32_t &b, - const int32_t &c) - { - asm volatile ("mad.lo.s32 %0, %1, %2, %3;\n" - : "=r"(d) : "r"(a), "r"(b), "r"(c)); - } - - - /// Compute "dp1" uint32_t->uint32_t - inline __device__ - static void mad( - uint32_t &d, - const uint32_t &a, - const uint32_t &b, - const uint32_t &c) - { - asm volatile ("mad.lo.u32 %0, %1, %2, %3;\n" - : "=r"(d) : "r"(a), "r"(b), "r"(c)); - } - -}; - - - -#if (CUTLASS_ARCH >= 610) // Specializations only enabled for Pascal SM610+ - - -/// "dp4" dot-product-accumulate traits specialization for int8_t->int32_t -template <> -struct dp_accummulate< - int8_t, ///< Component value type - int32_t> ///< Accumulator value type -{ - /// Four-component signed "idp4" - typedef int32_t dp_vector_t; - - /// Compute "dp4" int16_t->int32_t - inline __device__ - static void mad( - int32_t &d, - const int32_t &a, - const int32_t &b, - const int32_t &c) - { - asm volatile ( "dp4a.s32.s32 %0, %1, %2, %3;\n" - : "=r"(d) : "r"(a), "r"(b), "r"(c)); - } -}; - - -/// "dp4" dot-product-accumulate traits specialization for uint8_t->uint32_t -template <> -struct dp_accummulate< - uint8_t, ///< Component value type - uint32_t> ///< Accumulator value type -{ - /// Four-component unsigned "idp4" - typedef uint32_t dp_vector_t; - - /// Compute "dp4" uint16_t->uint32_t - inline __device__ - static void mad( - uint32_t &d, - const uint32_t &a, - const uint32_t &b, - const uint32_t &c) - { - asm volatile ( "dp4a.u32.u32 %0, %1, %2, %3;\n" - : "=r"(d) : "r"(a), "r"(b), "r"(c)); - } -}; - - -#endif // Specializations only enabled for Pascal SM610+ - - -} // namespace gemm -} // namespace cutlass - diff --git a/cutlass/gemm/epilogue_function.h b/cutlass/gemm/epilogue_function.h deleted file mode 100644 index 714dd817..00000000 --- a/cutlass/gemm/epilogue_function.h +++ /dev/null @@ -1,104 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Epilogue operation to compute final output - */ - -namespace cutlass { -namespace gemm { - - //// Used by GEMM to compute the final result C <= alpha * accumulator + beta * C - template < - typename accum_t, - typename output_t, - typename scalar_t - > - class blas_scaled_epilogue - { - public: - - scalar_t alpha; - scalar_t beta; - - inline __device__ __host__ - blas_scaled_epilogue( - scalar_t alpha, - scalar_t beta) - : - alpha(alpha), - beta(beta) - {} - - - /// Epilogue operator - inline __device__ __host__ - output_t operator()( - accum_t accumulator, - output_t c, - size_t idx) const - { - return output_t(alpha * scalar_t(accumulator) + beta * scalar_t(c)); - } - - - /// Epilogue operator - inline __device__ __host__ - output_t operator()( - accum_t accumulator, - size_t idx) const - { - return output_t(alpha * scalar_t(accumulator)); - } - - /** - * Configure epilogue as to whether the thread block is a secondary - * accumulator in an inter-block k-splitting scheme - */ - inline __device__ - void set_secondary_accumulator() - { - beta = scalar_t(1); - } - - - /// Return whether the beta-scaled addend needs initialization - inline __device__ - bool must_init_addend() - { - return (beta != scalar_t(0)); - } - }; - - - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/gemm.h b/cutlass/gemm/gemm.h new file mode 100644 index 00000000..0ca093ff --- /dev/null +++ b/cutlass/gemm/gemm.h @@ -0,0 +1,319 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements a software-pipelined efficient GEMM. +*/ +#pragma once + +#if !defined(__CUDACC_RTC__) +#include +#endif + +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +__global__ void gemm_kernel(typename Gemm_::Params params) { + // Declare shared memory. + __shared__ typename Gemm_::SharedStorage shared_storage; + + // Construct the GEMM object. + Gemm_ gemm(params, shared_storage); + // Run GEMM. + gemm.multiply_add(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmDesc { + /// The dimensions of the GEMM. + Index_ m, n, k; + /// The alpha/beta scaling values. + Scalar_ alpha, beta; + /// The source matrix A. + void const* d_a; + /// The stride for A. + Index_ lda; + /// The source matrix B. + void const* d_b; + /// The stride for B. + Index_ ldb; + /// The source matrix C. + void const* d_c; + /// The stride for C. + Index_ ldc; + /// The destination matrix D. + void* d_d; + /// The stride for D. + Index_ ldd; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Gemm { + /// This class. + typedef Gemm This_; + /// The traits. + typedef GemmTraits_ Traits; + /// The shared storage. + typedef typename Traits::SharedStorage SharedStorage; + + /// The scalar for A. + typedef typename Traits::ScalarA ScalarA; + /// The scalar for B. + typedef typename Traits::ScalarB ScalarB; + /// The scalar in the epilogue. + typedef typename Traits::Epilogue::Scalar ScalarEpilogue; + /// The scalar for C. + typedef typename Traits::Epilogue::ScalarC ScalarC; + /// The scalar for D. + typedef typename Traits::Epilogue::ScalarD ScalarD; + /// The index. + typedef typename Traits::Index Index; + + /// The number of threads. + static int const kThreads = Traits::GemmConfig::kThreads; + + /// The params. + struct Params : public Traits::Params { + CUTLASS_HOST_DEVICE int initialize(Index m, + Index n, + Index k, + ScalarEpilogue alpha, + ScalarA const* d_a, + Index lda, + ScalarB const* d_b, + Index ldb, + ScalarEpilogue beta, + ScalarC const* d_c, + Index ldc, + ScalarD* d_d, + Index ldd) { + GemmDesc desc; + desc.m = m; + desc.n = n; + desc.k = k; + desc.alpha = alpha; + desc.beta = beta; + desc.d_a = reinterpret_cast(d_a); + desc.lda = lda; + desc.d_b = reinterpret_cast(d_b); + desc.ldb = ldb; + desc.d_c = reinterpret_cast(d_c); + desc.ldc = ldc; + desc.d_d = reinterpret_cast(d_d); + desc.ldd = ldd; + return Traits::Params::initialize(desc); + } + }; + +#if !defined(__CUDACC_RTC__) + /// Launch the kernel. + static __host__ cudaError_t launch(Params const& params, + cudaStream_t stream = cudaStreamDefault) { + // Setup the grid. + dim3 grid; + grid.x = (params.m + Traits::OutputTile::kW - 1) / Traits::OutputTile::kW; + grid.y = (params.n + Traits::OutputTile::kH - 1) / Traits::OutputTile::kH; + + // The number of threads. + dim3 block; + block.x = kThreads; + + // Launch the kernel. + void const* params_ = reinterpret_cast(¶ms); + + return cudaLaunchKernel(reinterpret_cast(&gemm_kernel), + grid, + block, + const_cast(¶ms_), + 0, + stream); + } + + /// Launch the kernel. + static __host__ cudaError_t launch(CUfunction kernel, + Params const& params, + CUstream stream = CU_STREAM_LEGACY) { + // Setup the grid. + dim3 grid; + grid.x = (params.m + Traits::OutputTile::kW - 1) / Traits::OutputTile::kW; + grid.y = (params.n + Traits::OutputTile::kH - 1) / Traits::OutputTile::kH; + + // The number of threads. + dim3 block; + block.x = kThreads; + + // Launch the kernel. + void* params_[] = {const_cast(reinterpret_cast(¶ms))}; + + // return cudaLaunchKernel(reinterpret_cast(&gemm_kernel), grid, block, + // const_cast(¶ms_), 0, stream); + CUresult result = cuLaunchKernel( + kernel, grid.x, grid.y, grid.z, block.x, block.y, block.z, 0, stream, params_, 0); + + if (result != CUDA_SUCCESS) { + return cudaErrorLaunchFailure; + } + return cudaSuccess; + } + +#endif + + /// Ctor. + CUTLASS_DEVICE Gemm(Params const& params_, SharedStorage& shared_storage_) + : params(params_), shared_storage(shared_storage_) {} + + /// Do the GEMM. + CUTLASS_DEVICE void multiply_add() { + // Swizzle the IDs of the block (to enable better cache behavior). + typename Traits::BlockSwizzle block_swizzle; + dim3 block = block_swizzle.swizzle(); + + // Scale the id. + block.x *= Traits::OutputTile::kW; + block.y *= Traits::OutputTile::kH; + + // We may want to use shared memory to clear the registers. + typedef typename Traits::ClearAccumulators ClearAccumulators; + + // The streams to read A/B from global memory to shared memory. + typename Traits::GlobalLoadStream global_stream(params, shared_storage, block); + + // Create the accumulator clear. + ClearAccumulators clear(shared_storage.main_loop.clear); + + /// Define the mainloop iteration size + typedef typename Traits::MultiplyAdd MultiplyAdd; + + // By how much we unroll the main loop. + Index const kUnroll = static_cast(MultiplyAdd::AccumulatorsPerWarp::kD); + + // If we do not have enough steps in the main loop, trigger the residue code. + if (params.k < kUnroll) { + global_stream.residue(params.k, true); + } + + // Fetch the fragments for A and B from global memory. + global_stream.copy(); + + // Copy the elements to shared memory (after transformation if needed). + global_stream.commit(); + + // Make sure the data is in shared memory. + Traits::shared_store_fence(false); + + // The unrolling steps for the main loop. + int const kUnrollingSteps = + MultiplyAdd::AccumulatorsPerWarp::kD / MultiplyAdd::InstructionShape::kD; + + // Make sure we have at least 2 unrolling steps or our pipeling is not going to work. + static_assert(kUnrollingSteps >= 2, "The pipelining assumes at least two steps"); + + // The stream of data from shared memory to fragments. + typename Traits::SharedLoadStream shared_load_stream(params, shared_storage); + + // Trigger the copy from shared memory for the 1st stream. + shared_load_stream.copy(0); + + // Allocate the accumulators. + typename MultiplyAdd::Accumulators accumulators; + // Clear the accumulators. + clear.clear(accumulators); + + // Enter the main loop and iterate. + typedef typename Traits::Index Index; + for (Index outer_k = params.k - kUnroll; outer_k > -kUnroll; outer_k -= kUnroll) { + // If that's the last "load iteration" update the predicates. + int const is_residue = outer_k <= kUnroll; + if (is_residue) { + global_stream.residue(outer_k); + } + + // Load data for the next iteration of the main loop. + global_stream.copy(); + + CUTLASS_PRAGMA_UNROLL + for (int step = 0; step < kUnrollingSteps - 1; ++step) { + // Trigger the copy from shared memory for the next A/B values. + shared_load_stream.copy(step + 1); + // Make sure the values are available for the current iteration to do the multiply-add. + shared_load_stream.commit(step); + + // Do the math on the fragments of the current iteration. + MultiplyAdd multiply_add; + multiply_add.multiply_add(shared_load_stream.fragment_a(step), + shared_load_stream.fragment_b(step), + accumulators, + accumulators); + } + + // Make sure the data from shared memory has been entirely consumed. + Traits::shared_load_fence(true); + + // Commit the data in shared memory for A/B. + global_stream.commit(); + + // Make sure the data is in shared memory. + Traits::shared_store_fence(true); + + // Move to the next stage for the load (if it makes sense). + shared_load_stream.inc_stage(); + // Trigger the copy from shared memory for the next loop iteration. + shared_load_stream.copy(0); + // Make sure the values are available for the current iteration to do the multiply-add. + shared_load_stream.commit(kUnrollingSteps - 1); + + // Do the math on the fragments of the current iteration. + MultiplyAdd multiply_add; + multiply_add.multiply_add(shared_load_stream.fragment_a(kUnrollingSteps - 1), + shared_load_stream.fragment_b(kUnrollingSteps - 1), + accumulators, + accumulators); + } + + // Epilogue. + typedef typename Traits::Epilogue Epilogue; + Epilogue epilogue(params.epilogue, shared_storage.epilogue, params.m, params.n); + epilogue.epilogue(cutlass::make_Coord(0, block.y, block.x), accumulators); + } + + /// The params. + Params const& params; + /// The shared storage. + SharedStorage& shared_storage; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_epilogue.h b/cutlass/gemm/gemm_epilogue.h new file mode 100644 index 00000000..de6513a4 --- /dev/null +++ b/cutlass/gemm/gemm_epilogue.h @@ -0,0 +1,225 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements the epilogue phase of the GEMM kernel that efficiently updates global memory + with + the computed matrix product. +*/ +#pragma once + +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +CUTLASS_DEVICE bool is_zero(T x) { + return x == T(0); +} + +#if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) +CUTLASS_DEVICE bool is_zero(half x) { return reinterpret_cast(x) == int16_t(0); } +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmEpilogue { + /// The traits class. + typedef GemmEpilogueTraits_ Traits; + /// The params. + typedef typename Traits::Params Params; + /// The shared storage. + typedef typename Traits::SharedStorage SharedStorage; + + /// The output tile. + typedef typename Traits::OutputTile OutputTile; + /// The number of iterations. + typedef typename Traits::Iterations Iterations; + /// The accumulators. + typedef typename Traits::Accumulators Accumulators; + /// The scalar. + typedef typename Traits::Scalar Scalar; + /// The functor in charge of the math. + typedef typename Traits::Functor Functor; + + /// We do not support 3D or 4D shapes. + static_assert(Iterations::kD == 1 && Iterations::kC == 1, "Unsupported 3D/4D shapes"); + + /// The iterator for C in global memory. + typedef typename Traits::GlobalLoadIteratorC GlobalLoadIteratorC; + /// The transformer for C. + typedef typename Traits::GlobalTransformerC GlobalTransformerC; + /// The transformer for D. + typedef typename Traits::GlobalTransformerD GlobalTransformerD; + /// The iterator for D in global memory. + typedef typename Traits::GlobalStoreIteratorD GlobalStoreIteratorD; + /// The iterator to store D in shared memory. + typedef typename Traits::SharedStoreIteratorD SharedStoreIteratorD; + /// The shared store transformer for D. + typedef typename Traits::SharedStoreTransformerD SharedStoreTransformerD; + /// The iterator to load D in shared memory. + typedef typename Traits::SharedLoadIteratorD SharedLoadIteratorD; + /// The shared load transformer for D. + typedef Copy SharedLoadTransformerD; + + /// The index. + typedef typename Traits::Index Index; + + /// The scalar for C. + typedef typename GlobalLoadIteratorC::Scalar ScalarC; + /// The scalar for D. + typedef typename GlobalStoreIteratorD::Scalar ScalarD; + + /// Ctor. + CUTLASS_DEVICE GemmEpilogue(Params const& params_, + SharedStorage& shared_storage_, + Index m_, + Index n_) + : params(params_), shared_storage(shared_storage_), m(m_), n(n_) {} + + /// Execute the epilogue. + CUTLASS_DEVICE void epilogue(Coord<3> const& block, Accumulators& accumulators) { + if (is_zero(params.functor.beta)) { + epilogue_with_or_without_beta(block, accumulators); + } else { + epilogue_with_or_without_beta(block, accumulators); + } + } + + template + CUTLASS_DEVICE void epilogue_with_or_without_beta(Coord<3> const& block, + Accumulators& accumulators) { + + Coord<3> const bounds = cutlass::make_Coord(0, n, m); + + // The functor. + Functor functor(params.functor); + // The C fragment. + typename GlobalLoadIteratorC::Fragment fragment_c; + // The transformed C fragment. + typename GlobalTransformerC::OutputFragment transformed_c; + + CUTLASS_PRAGMA_UNROLL + for (int h = 0; h < Iterations::kH; ++h) { + // Compute pointer and predicate offsets for C and D global iterators. + int const pointer_offset = + ((params.iterator_d.inc_h * (GlobalStoreIteratorD::Iterations::kH - 1) + + params.iterator_d.inc_advance) * + Iterations::kW + + params.stride_h) * + h; + int const predicate_offset = + ((params.iterator_d.predicate_inc_h * (GlobalStoreIteratorD::Iterations::kH - 1) + + params.iterator_d.predicate_inc_advance) * + Iterations::kW + + Traits::Delta::kH) * + h; + + // The iterator to load the elements of the C matrix. + GlobalLoadIteratorC global_load_iterator( + params.iterator_c, bounds, block, pointer_offset, predicate_offset); + // The transformer for C. + GlobalTransformerC transformer_c; + // The transformer for D. + GlobalTransformerD transformer_d; + // The iterator to store into the D matrix. + GlobalStoreIteratorD global_store_iterator( + params.iterator_d, bounds, block, pointer_offset, predicate_offset); + + CUTLASS_PRAGMA_UNROLL + for (int w = 0; w < Iterations::kW; ++w) { + // Load the C matrix into fragment. + if (!kBetaIsZero_) { + iterator_load(global_load_iterator, fragment_c); + } + + // Make sure we can write to shared memory. + shared_load_fence(); + + // Copy the accumulators to shared memory. + int const offset = (h * Iterations::kW + w) * SharedStoreIteratorD::Fragment::kElements; + + SharedStoreTransformerD shared_store_transformer; + typename SharedStoreTransformerD::OutputFragment shared_store_transformed_d; + shared_store_transformer.transform(accumulators, offset, shared_store_transformed_d); + + SharedStoreIteratorD shared_store_iterator(params.shared_store_iterator_d, + shared_storage.shared_stream.store); + shared_iterator_store(shared_store_iterator, shared_store_transformed_d); + + // Make sure the data is in shared memory. + shared_store_fence(); + + // Copy the accumulators back to registers from shared memory. + SharedLoadIteratorD shared_load_iterator(params.shared_load_iterator_d, + shared_storage.shared_stream.load); + typename SharedLoadIteratorD::Fragment fetched_d; + shared_iterator_load(shared_load_iterator, fetched_d); + + // Do the math. + typename GlobalTransformerD::InputFragment fragment_d; + + if (kBetaIsZero_) { + functor.evaluate(fetched_d, fragment_d); + } else { + // Transform C fragment. + transformer_c.transform(fragment_c, transformed_c); + // Do the math. + functor.evaluate(fetched_d, transformed_c, fragment_d); + } + + // Transform D fragment. + typename GlobalTransformerD::OutputFragment transformed_d; + transformer_d.transform(fragment_d, transformed_d); + + // Copy the results to global memory. + iterator_store(global_store_iterator, transformed_d); + } + } + } + + /// The memory fence for shared loads. + CUTLASS_DEVICE void shared_load_fence() { __syncthreads(); } + + /// The memory fence for shared stores. + CUTLASS_DEVICE void shared_store_fence() { __syncthreads(); } + + /// The params. + Params const& params; + /// The shared storage. + SharedStorage& shared_storage; + /// The dimensions of the GEMM. + Index m, n; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_epilogue_traits.h b/cutlass/gemm/gemm_epilogue_traits.h new file mode 100644 index 00000000..c06fc250 --- /dev/null +++ b/cutlass/gemm/gemm_epilogue_traits.h @@ -0,0 +1,331 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines structural properties of the GEMM epilogue. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The output tile. + typename OutputTile_, + /// The accumulators. + typename Accumulators_, + /// The iterator to load C from global memory. + typename GlobalLoadIteratorC_, + /// The transformer for C. + typename GlobalTransformerC_, + /// The transformer for D. + typename GlobalTransformerD_, + /// The iterator to store D to global memory. + typename GlobalStoreIteratorD_, + /// The iterator to store D to shared memory. + typename SharedStoreIteratorD_, + /// The shared store transformer for D. + typename SharedStoreTransformerD_, + /// The iterator to load D from shared memory. + typename SharedLoadIteratorD_, + /// The number of iterations in the epilogue. + typename Iterations_, + /// The iterations strides. + typename Delta_, + /// The functor to be used in the epilogue. + typename Functor_, + /// The index. + typename Index_ = int> +struct GemmEpilogueTraits { + // + /// The output tile. + typedef OutputTile_ OutputTile; + /// The number of iterations. + /// The accumulators. + typedef Accumulators_ Accumulators; + /// The iterator for C in global memory. + typedef GlobalLoadIteratorC_ GlobalLoadIteratorC; + /// The transformer for C. + typedef GlobalTransformerC_ GlobalTransformerC; + /// The transformer for D. + typedef GlobalTransformerD_ GlobalTransformerD; + /// The iterator for D in global memory. + typedef GlobalStoreIteratorD_ GlobalStoreIteratorD; + /// The iterator to store D in shared memory. + typedef SharedStoreIteratorD_ SharedStoreIteratorD; + /// The shared store transformer for D. + typedef SharedStoreTransformerD_ SharedStoreTransformerD; + /// The iterator to store D in shared memory. + typedef SharedLoadIteratorD_ SharedLoadIteratorD; + /// typedef typename GemmConfig::EpilogueIterations Iterations; + typedef Iterations_ Iterations; + /// The iterations strides. + typedef Delta_ Delta; + + /// The functor in charge of the math. + typedef Functor_ Functor; + /// The index. + typedef Index_ Index; + + /// We do not support 3D or 4D shapes. + static_assert(Iterations::kD == 1 && Iterations::kC == 1, "Unsupported 3D/4D shapes"); + + /// The scalar. + typedef typename Functor::Scalar Scalar; + /// The scalar for C. + typedef typename GlobalLoadIteratorC::Scalar ScalarC; + /// The scalar for D. + typedef typename GlobalStoreIteratorD::Scalar ScalarD; + + /// The params. + struct Params { + /// The strides for H and W in the different iterations of the epilogue. + Index stride_h, stride_w; + /// The params for the C iterator. + typename GlobalLoadIteratorC::Params iterator_c; + /// The params for the D global iterator. + typename GlobalStoreIteratorD::Params iterator_d; + /// The params for the D shared store iterator. + typename SharedStoreIteratorD::Params shared_store_iterator_d; + /// The params for the D shared load iterator. + typename SharedLoadIteratorD::Params shared_load_iterator_d; + /// The functor params. + typename Functor::Params functor; + + /// Setup the params. + template + CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) { + // The parameters for the functor. + int error_code = functor.initialize(desc); + if (error_code) { + return error_code; + } + + // At the end of the H iteration, we jump over a number of columns. + this->stride_h = desc.ldd * Delta::kH; + // Nothing to do here. + this->stride_w = 0; + + // Setup the params for the global memory iterator for C. + error_code = iterator_c.initialize( + reinterpret_cast(desc.d_c), desc.ldc, desc.n, stride_w, Delta::kW); + if (error_code) { + return error_code; + } + + // Setup the params for the global memory iterator for D. + return iterator_d.initialize( + reinterpret_cast(desc.d_d), desc.ldd, desc.n, stride_w, Delta::kW); + } + }; + + /// The shared memory storage to exchange data. + union StreamSharedStorage { + // The storage for the store iterator. + typename SharedStoreIteratorD::SharedStorage store; + // The storage for the store iterator. + typename SharedLoadIteratorD::SharedStorage load; + }; + + /// The shared memory to swizzle the data in the epilogue. + struct SharedStorage { + // The storage for the shared stream D. + StreamSharedStorage shared_stream; + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmEpilogueTraitsHelper { + /// The scalar. + typedef typename EpilogueFunctor_::Scalar Scalar; + /// The output tile. + typedef typename GemmConfig_::OutputTile OutputTile; + + /// The number of iterations in the epilogue. + typedef Shape<1, + GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH / + GemmConfig_::kAccumulatorsPerLdsB, + GemmConfig_::kAccumulatorsPerLdsB> + Iterations; + // The iteration strides in the H/W dimension. + typedef Shape<0, + GemmConfig_::kAccumulatorsPerLdsB*( + GemmConfig_::Warps::kH* GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1), + 0> + Delta; + /// The functor to do the math in the epilogue. + typedef EpilogueFunctor_ Functor; + + /// The traits class to build the iterator to store to shared memory for D. + typedef GemmSharedStoreTileDTraits< + // The pointer is float. + typename Functor::Scalar, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The number of scalars per STS. + GemmConfig_::kScalarsPerStsD, + // The skew -- 128 / sizeof(ScalarD) / kScalarsPerStsD is the number of threads involved in + // a single STS. We divide by 2 as our objective is to add a skew to the odd threads to + // avoid bank conflicts between odd and even threads. + 128 / sizeof(typename GemmConfig_::ScalarD) / GemmConfig_::kScalarsPerStsD / 2 * + GemmConfig_::kScalarsPerStsD> + SharedStoreTileTraits; + + /// The iterator to store D to shared memory. + typedef TileStoreIterator + SharedStoreIteratorD; + + /// The shared store transformer for D. + typedef Copy SharedStoreTransformerD; + + /// The traits class to build the iterator to load from shared memory for D. + typedef GemmSharedLoadTileDTraits< + // The pointer is float. + typename Functor::Scalar, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The number of columns of the output tile written by iteration. + GemmConfig_::OutputTile::kH / ShapeCount::kCount, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsD, + // The skew. + SharedStoreTileTraits::kSkew> + SharedLoadTileTraits; + + /// The iterator to load D from shared memory. + typedef TileLoadIterator + SharedLoadIteratorD; + + /// The traits class to build the iterator to load data from global memory for C^N. + typedef GemmGlobalTileCdTraits< + // The pointer is float const. + typename GemmConfig_::ScalarC const, + // The tile has size (N / Iterations)xM in GEMM's terminology. + Shape<1, + GemmConfig_::OutputTile::kH / ShapeCount::kCount, + GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // How many elements do we jump over at each iteration? + Iterations::kW, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgC> + GlobalLoadTileTraits; + + /// The iterator to load C. + typedef GemmGlobalIteratorCd GlobalLoadIteratorC; + /// The transformer for C. + typedef Copy GlobalTransformerC; + + /// The traits class to build the iterator to store data to global memory for D^N. + typedef GemmGlobalTileCdTraits< + // The pointer is float. + typename GemmConfig_::ScalarD, + // The tile has size (N / Iterations)xM in GEMM's terminology. + Shape<1, + GemmConfig_::OutputTile::kH / ShapeCount::kCount, + GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // How many elements do we jump over at each iteration? + Iterations::kW, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerStgD> + GlobalStoreTileTraits; + + /// The iterator to store D. + typedef GemmGlobalIteratorCd GlobalStoreIteratorD; + /// The transformer for D. + typedef Copy GlobalTransformerD; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The GEMM config. + typename GemmConfig_, + /// The epilogue functor to do the math in the epilogue. + typename EpilogueFunctor_, + /// The index. + typename Index_ = int, + /// The helper to create the traits class. + typename Helper_ = GemmEpilogueTraitsHelper > +struct SimplifiedGemmEpilogueTraits : public GemmEpilogueTraits< + // The output tile. + typename GemmConfig_::OutputTile, + // The accumulators. + typename GemmConfig_::Accumulators, + // The global iterator for C. + typename Helper_::GlobalLoadIteratorC, + // The transformer for C. + typename Helper_::GlobalTransformerC, + // The transformer for D. + typename Helper_::GlobalTransformerD, + // The global iterator for D. + typename Helper_::GlobalStoreIteratorD, + // The iterator to store D to shared memory. + typename Helper_::SharedStoreIteratorD, + // The shared store transformer for D. + typename Helper_::SharedStoreTransformerD, + // The iterator to load D from shared memory. + typename Helper_::SharedLoadIteratorD, + // The number of iterations. + typename Helper_::Iterations, + // The strides between iterations. + typename Helper_::Delta, + // The functor to be used in the epilogue. + EpilogueFunctor_, + // The index. + Index_> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_global_stream.h b/cutlass/gemm/gemm_global_stream.h new file mode 100644 index 00000000..194f0dec --- /dev/null +++ b/cutlass/gemm/gemm_global_stream.h @@ -0,0 +1,175 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements efficient loading of the thread block-level tile from global memory and + storing + to shared memory. +*/ +#pragma once + +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The load iterator. + typename LoadIterator_, + /// The store iterator to copy to shared memory. + typename StoreIterator_, + /// The transformer to be applied after the data has been copied from global memory. + typename Transformer_> + +struct GlobalLoadStreamBase { + /// The load iterator. + typedef LoadIterator_ LoadIterator; + /// The transformer. + typedef Transformer_ Transformer; + /// The store iterator to write to shared memory. + typedef StoreIterator_ StoreIterator; + + /// The fragment that is copied from shared memory. + typedef typename LoadIterator::Fragment FetchedFragment; + /// The fragment that is obtained after the transformation by the transformer. + typedef typename Transformer::OutputFragment TransformedFragment; + /// Make sure the fragments match. + static_assert((platform::is_same::value), + ""); + /// The output fragment. + typedef TransformedFragment Fragment; + /// Make sure the transformed fragment is the same as the store fragment. + static_assert((platform::is_same::value), + ""); + + /// The layout. + static MatrixLayout::Kind const kLayout = LoadIterator::kLayout; + /// The scalar type of the iterator. + typedef typename LoadIterator::Scalar Scalar; + /// The pointer. + typedef typename LoadIterator::Pointer Pointer; + /// The index. + typedef typename LoadIterator::Index Index; + + /// The params. + struct Params { + // The load iterator. + typename LoadIterator::Params load_iterator; + // The store iterator. + typename StoreIterator::Params store_iterator; + + /// Setup the params. + CUTLASS_HOST_DEVICE int initialize(Pointer pointer, Index ld) { + int error_code = load_iterator.initialize(pointer, ld); + if (error_code) { + return error_code; + } + + return store_iterator.initialize(); + } + }; + + /// The amount of storage in shared memory needed to store the tile. + typedef typename StoreIterator::SharedStorage SharedStoreStorage; + + /// The storage in shared memory needed by that stream. + union SharedStorage { + // The load iterator. + typename LoadIterator::SharedStorage load_iterator; + // The store iterator. + SharedStoreStorage store_iterator; + }; + + /// Ctor. + CUTLASS_DEVICE GlobalLoadStreamBase(Params const& params, + SharedStorage& shared_storage, + Coord<3> const bounds, + Coord<3> const& block) + : load_iterator(params.load_iterator, bounds, block), + transformer(), + store_iterator(params.store_iterator, shared_storage.store_iterator) + + { + fetched_fragment.clear(); + } + + /// Load the data from shared memory to the fetch fragment. + CUTLASS_DEVICE void copy() { iterator_load(load_iterator, fetched_fragment); } + + /// Commit the data. + CUTLASS_DEVICE void commit() { + transformer.transform(fetched_fragment, transformed_fragment); + iterator_store(store_iterator, transformed_fragment); + store_iterator.inc_stage(); + } + + /// Execute the residue code. + CUTLASS_DEVICE void residue(Index k, bool skip_clear = false) { + load_iterator.residue(k); + if (!skip_clear) { + fetched_fragment.clear(); + } + } + + /// The iterator. + LoadIterator load_iterator; + /// The fragment to fetch from shared memory. + FetchedFragment fetched_fragment; + /// The transformer. + Transformer transformer; + /// The fragment to convert the data after it has been fetched from shared memory. + TransformedFragment transformed_fragment; + /// The store iterator. + StoreIterator store_iterator; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The load iterator. + typename LoadIterator_, + /// The store iterator to copy to shared memory. + typename StoreIterator_, + /// The transformer to be applied after the data has been copied from global memory. + typename Transformer_ = Copy > + +struct GlobalLoadStream : public GlobalLoadStreamBase { + /// The base class. + typedef GlobalLoadStreamBase Base; + + /// Ctor. + CUTLASS_DEVICE GlobalLoadStream(typename Base::Params const& params, + typename Base::SharedStorage& shared_storage, + Coord<3> const& bounds, + Coord<3> const& block) + : Base(params, shared_storage, bounds, block) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_global_tile.h b/cutlass/gemm/gemm_global_tile.h new file mode 100644 index 00000000..28bcc6a9 --- /dev/null +++ b/cutlass/gemm/gemm_global_tile.h @@ -0,0 +1,478 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines iterators for efficiently loading and storing to global memory. +*/ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// The following functor reshapes a tile of threads to match a tile of data. The idea is that when +// the user wants to build the iterator traits, he/she may want to specify the tile independently +// from the number of scalars loaded/stored per instruction. For example, in the row-major version +// with a tile of size 128x8 - the user may want to that the iterator works with 32x8 threads if +// each thread loads 1 scalar per LDG. If the user changes to 4 scalars per LDG, then the tile of +// threads has to change. The code below detects that and correct the code automatically - it is +// a helper when the user does not specify the right configuration. + +template +struct ReshapeThreads { + typedef Threads_ Threads; +}; + +template +struct ReshapeThreads { + typedef Shape Threads; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmGlobalTileTraits { + /// Identity of the operand + static GemmOperand::Kind const kOperand = kOperand_; + /// The layout. + static MatrixLayout::Kind const kLayout = kLayout_; + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The number of scalars per LDG/STG. + static int const kAccessSize = kAccessSize_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kGlobal; + + /// The tile shape + typedef typename ReshapeTile::Tile Tile; + /// The threads shape + typedef typename ReshapeThreads::Threads Threads; + /// The relative offset between two elements in the H/W dimension in adjacent threads. + typedef Shape<1, 1, Tile::kC> ThreadsDelta; + + /// The strides in each dimension between different loads/stores. + typedef Shape<0, Threads::kH, Threads::kW * kAccessSize> Delta; + /// Strides for immediate offset computation + typedef Shape<0, 0, Threads::kW * ThreadsDelta::kW, kAccessSize> ImmediateOffsetStrides; + /// The number of iterations needed to load/store the tile. + typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kAccessSize> + Iterations; + + typedef GemmMultiplicandTraits MultiplicandTraits; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH; + int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW; + + return make_Coord(0, thread_offset_h, thread_offset_w, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmGlobalTileCdTraits : public GemmGlobalTileTraits { + /// The base class. + typedef GemmGlobalTileTraits + Base; + + /// The stride in the H dimension. + static int const kStrideH = kStrideH_; + /// Override the strides in each dimension between different loads/stores. + typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> Delta; + + typedef typename Base::Iterations Iterations; + + typedef typename Base::Threads Threads; + + typedef typename Base::ThreadsDelta ThreadsDelta; + + typedef typename Base::ImmediateOffsetStrides ImmediateOffsetStrides; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int thread_offset_h = threadIdx.x / Threads::kW * kStrideH * Iterations::kH; + int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW; + + return make_Coord(0, thread_offset_h, thread_offset_w, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmGlobalIteratorAb + : public TileLoadIterator { + /// This class. + typedef GemmGlobalIteratorAb This_; /// The base class. + + typedef TileLoadIterator + Base; + /// The layout. + static MatrixLayout::Kind const kLayout = TileTraits_::kLayout; + /// Fragment type loaded by the iterator + typedef typename Base::Fragment Fragment; + /// The scalar. + typedef typename TileTraits_::Scalar Scalar; + /// The threads. + typedef typename TileTraits_::Threads Threads; + /// The index. + typedef Index_ Index; + /// The thread offset + typedef typename TileTraits_::ThreadOffset ThreadOffset; + /// Specifies in which dimension post-increment accesses advance. + static IteratorAdvance::Kind const kAdvance = Base::kAdvance; + + typedef cutlass::PredicateVector::kCount> PredicateVector; + + /// Iterator parameters type + typedef typename Base::Params BaseParams; + + struct Params : public BaseParams { + /// Initializes params to load a strip-mined tile, given pointer and stride_h. + CUTLASS_HOST_DEVICE int initialize(Scalar const* ptr, Index stride_h) { + Index inc_d = 0; + Index inc_advance = 0; + // Move by some columns for each iteration in the H dimension. + Index inc_h = Base::Delta::kH * stride_h; + + // Move by some more columns in the number of iterations if the D dimension is > 1. + if (Base::Delta::kD > 0) { + inc_d = Base::Delta::kD * stride_h - (Base::Iterations::kH - 1) * inc_h; + } + + // Move to the beginning of the next iteration. + if (kAdvance == IteratorAdvance::kH && Base::Delta::kD > 0) { + inc_advance = inc_d; + } else if (kAdvance == IteratorAdvance::kH) { + inc_advance = inc_h; + } else if (Base::Delta::kD > 0) { + inc_advance = (Base::Iterations::kW + 0) * ShapeCount::kWc - + (Base::Iterations::kH - 1) * inc_h - + (Base::Iterations::kD - 1) * Base::Delta::kD * stride_h; + } else { + inc_advance = (Base::Iterations::kW + 0) * ShapeCount::kWc - + (Base::Iterations::kH - 1) * inc_h; + } + + Base::Params::initialize(ptr, 0, stride_h, 0, inc_d, inc_h, 0, inc_advance); + return 0; + } + }; + + /// Offset of an individual lane from the start of the tile + Coord<4> thread_offset; + /// The parameters + Params params; + + CUTLASS_DEVICE void initialize_predicates(const Coord<3>& bounds, const Coord<3>& block) { + // Setup the masks to control loads. + predicates.fill(0); + + int bounds_h, bounds_w; + if (kAdvance == IteratorAdvance::kH) { + bounds_w = bounds[2] - block[2]; + bounds_h = bounds[1]; + + } else { + bounds_w = bounds[1]; + bounds_h = bounds[2] - block[1]; + } + + // Fill in the bits of the predicate vector. + for (int d = 0; d < Base::Iterations::kD; ++d) { + for (int h = 0; h < Base::Iterations::kH; ++h) { + for (int w = 0; w < Base::Iterations::kW; ++w) { + for (int c = 0; c < Base::Iterations::kC; ++c) { + bool flag = w * Base::Delta::kW < bounds_w; + if (kAdvance == IteratorAdvance::kH) { + flag = flag && (h * Base::Delta::kH + d * Base::Delta::kD) < bounds_h; + } else { + flag = flag && (h * Base::Delta::kH) < bounds_h; + } + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + predicates.set(bit, flag); + } + } + } + } + } + + /// Ctor. + CUTLASS_DEVICE GemmGlobalIteratorAb(Params const& _params, + const Coord<3>& bounds, + const Coord<3>& block, + ThreadOffset thread_offset_func = ThreadOffset()) + : params(_params) { + thread_offset = thread_offset_func(); + // The column. + Index block_h = thread_offset[1]; + // The contiguous dimension. + Index block_w = thread_offset[2]; + + // Add the blocks indices. + if (kAdvance == IteratorAdvance::kH) { + block_h += block[1]; + block_w += block[2]; + + } else { + block_h += block[2]; + block_w += block[1]; + } + + // Setup the pointer. + params.pointer += (block_h * params.stride_h + block_w); + + // Initialize predicates + initialize_predicates(bounds, make_Coord(0, block_h, block_w)); + } + + /// Increment the pointer in the H dimension. + CUTLASS_DEVICE void inc_h() { params.pointer += params.inc_h; } + /// Increment the pointer in the D dimension. + CUTLASS_DEVICE void inc_d() { params.pointer += params.inc_d; } + /// Increment the pointer to move to the next iteration. + CUTLASS_DEVICE void inc_advance() { params.pointer += params.inc_advance; } + + /// Returns the current pointer + CUTLASS_HOST_DEVICE + Scalar const* data() const { return params.pointer; } + + /// That's the residue! Update the predicates. + CUTLASS_DEVICE void residue(Index k) { + // The coordinates of the thread. + Index block_h = thread_offset[1]; + // The contiguous dimension. + Index block_w = thread_offset[2]; + + // Update the predicate vector. + for (int d = 0; d < Base::Iterations::kD; ++d) { + for (int h = 0; h < Base::Iterations::kH; ++h) { + for (int w = 0; w < Base::Iterations::kW; ++w) { + for (int c = 0; c < Base::Iterations::kC; ++c) { + Index offset = 0; + if (kAdvance == IteratorAdvance::kH) { + offset += block_h + h * Base::Delta::kH + d * Base::Delta::kD; + } else { + offset += block_w + w * Base::Delta::kW; + } + + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + if (offset >= k) { + predicates.set(bit, false); + } + } + } + } + } + } + + /// Is the iterator valid? + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + return predicates[bit]; + } + + /// The predicates. + PredicateVector predicates; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmGlobalIteratorCd : public TileIteratorBase { + /// This class. + typedef GemmGlobalIteratorCd This_; + /// The base class. + typedef TileIteratorBase + Base; + + /// The layout. + static MatrixLayout::Kind const kLayout = TileTraits_::kLayout; + + /// The scalar. + typedef typename TileTraits_::Scalar Scalar; + /// The pointer. + typedef typename TileTraits_::Pointer Pointer; + /// The threads. + typedef typename TileTraits_::Threads Threads; + /// The index. + typedef Index_ Index; + /// The thread offset + typedef typename TileTraits_::ThreadOffset ThreadOffset; + + /// The params. + struct Params { + /// The pointer. + Pointer pointer; + /// The stride in the H dimension to setup the thread in the block. + Index stride_h; + /// The strides to increment the pointer. + Index inc_advance, inc_h; + /// The strides to increment the predicate offset + Index predicate_inc_advance, predicate_inc_h; + /// The column offset to compute the predicate for the columns. + Index predicate_offset; + + /// Setup the params. + CUTLASS_HOST_DEVICE int initialize( + Pointer pointer, Index ld, Index bound, Index epilogue_stride_w, Index epilogue_delta_w) { + // The pointer. + this->pointer = pointer; + // Each column of the matrix. + stride_h = TileTraits_::ThreadsDelta::kH * ld; + // Each thread output 1 column per iteration. The stride between columns is given by the + // number of scalars that are loaded per LDS for B. + inc_h = ld * TileTraits_::kStrideH; + inc_advance = + (ld - ld * TileTraits_::kStrideH * (Base::Iterations::kH - 1)) + epilogue_stride_w; + + predicate_offset = bound; + predicate_inc_h = TileTraits_::kStrideH; + predicate_inc_advance = + -((TileTraits_::kStrideH * (Base::Iterations::kH - 1) - 1) + epilogue_delta_w); + + return 0; + } + }; + + Params params; + /// Offset of an individual lane from the start of the tile + Coord<4> thread_offset; + + /// Ctor. + CUTLASS_DEVICE GemmGlobalIteratorCd() {} + + /// Ctor. + CUTLASS_DEVICE GemmGlobalIteratorCd(Params const& params, + const Coord<3>& bounds, + const Coord<3>& block, + int offset = 0, + int pred_offset = 0, + ThreadOffset thread_offset_func = ThreadOffset()) + : params(params) { + thread_offset = thread_offset_func(); + // Each warp works on a different column of the tile. + int const h = thread_offset[1] + block[1]; + // Each lane writes a different element. + int const w = thread_offset[2] + block[2]; + // Setup the pointer. + this->params.pointer += ((h * params.stride_h + w) + offset); + + // Prepare the vector of predicates. + for (int i = 0; i < Base::Iterations::kW; ++i) { + predicates.set(i, w + i * Base::Delta::kW < bounds[2]); + } + this->params.predicate_offset -= (h + pred_offset); + } + + /// Increment the pointer in the C dimension. + CUTLASS_DEVICE void inc_c() {} + /// Increment the pointer in the W dimension. + CUTLASS_DEVICE void inc_w() {} + /// Increment the pointer in the H dimension. + CUTLASS_DEVICE void inc_h() { + params.pointer += params.inc_h; + params.predicate_offset -= params.predicate_inc_h; + } + /// Increment the pointer in the D dimension. + CUTLASS_DEVICE void inc_d() {} + /// Increment the pointer to move to the next iteration. + CUTLASS_DEVICE void inc_advance() { + params.pointer += params.inc_advance; + this->params.predicate_offset -= params.predicate_inc_advance; + } + + /// Test the validity of the iterator. + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { + return predicates.at(w) && params.predicate_offset > 0; + } + + /// Returns the raw pointer + CUTLASS_HOST_DEVICE + Pointer data() { return params.pointer; } + + CUTLASS_HOST_DEVICE + Pointer const data() const { return params.pointer; } + + /// The predicates for the row. + cutlass::PredicateVector predicates; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_operand.h b/cutlass/gemm/gemm_operand.h new file mode 100644 index 00000000..737f993f --- /dev/null +++ b/cutlass/gemm/gemm_operand.h @@ -0,0 +1,141 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear + memory. +*/ +#pragma once + +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to describe attributes of GEMM matrix operands +template +struct GemmOperandTraitsAb { + static const bool Congruous = + (kOperand_ == GemmOperand::kA ^ kLayout_ == MatrixLayout::kRowMajor); +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GetExtent; + +template +struct GetExtent { + static const int kExtent = Tile_::kW; +}; + +template +struct GetExtent { + static const int kExtent = Tile_::kH; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Determines the shape of a multiplicand tile in terms of strided (H) and contiguous (W) +/// dimensions +template +struct GemmMultiplicandTraits { + // Only defined for A or B + static_assert(Usage == GemmOperand::kA || Usage == GemmOperand::kB, + "MultiplicandTileShape defined only for A or B operands."); + + /// Shape of GEMM thread block tile (K, N, M) + typedef ThreadBlockTile_ ThreadBlockTile; + + /// Identifies multiplicand + static GemmOperand::Kind const kUsage = Usage; + + /// Layout of tile + static MatrixLayout::Kind const kLayout = Layout; + + // True if K is the strided dimension + static bool const kKstrided = (kUsage == GemmOperand::kA ^ kLayout == MatrixLayout::kRowMajor); + + /// Map the ThreadBlockShape onto (kH, kW) dimensions for A and B operand + typedef typename platform::conditional< + kKstrided, + Shape<1, ThreadBlockTile::kD, GetExtent::kExtent>, + Shape<1, GetExtent::kExtent, ThreadBlockTile::kD> >::type Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Project's a coordinate (K, N, M) onto inner and outer dimensions defined for each +/// operand. +template +struct ProjectOperand; + +/// Project A operand - (0, K, M) +template +struct ProjectOperand { + CUTLASS_HOST_DEVICE + static Coord<3> project(Coord<3> const &coord) { + if (Kstrided) { + return make_Coord(0, coord[0], coord[2]); + } else { + return make_Coord(0, coord[2], coord[0]); + } + } +}; + +/// Project B operand - (0, K, N) +template +struct ProjectOperand { + CUTLASS_HOST_DEVICE + static Coord<3> project(Coord<3> const &coord) { + if (Kstrided) { + return make_Coord(0, coord[0], coord[1]); + } else { + return make_Coord(0, coord[1], coord[0]); + } + } +}; + +/// Project C operand - (0, N, M) +template <> +struct ProjectOperand { + CUTLASS_HOST_DEVICE + static Coord<3> project(Coord<3> const &coord) { return make_Coord(0, coord[1], coord[2]); } +}; + +/// Project D operand - (0, N, M) +template <> +struct ProjectOperand { + CUTLASS_HOST_DEVICE + static Coord<3> project(Coord<3> const &coord) { return make_Coord(0, coord[1], coord[2]); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_shared_stream.h b/cutlass/gemm/gemm_shared_stream.h new file mode 100644 index 00000000..c6ff7bd9 --- /dev/null +++ b/cutlass/gemm/gemm_shared_stream.h @@ -0,0 +1,113 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines abstractions for managing loading and storing fragments to shared memory in the + efficient GEMM pipeline. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The load iterator. + typename Iterator_, + /// The transformer to be applied after the data has been copied from shared memory. + typename Transformer_ = Copy > + +struct SharedLoadStream { + /// The load iterator. + typedef Iterator_ Iterator; + /// The transformer. + typedef Transformer_ Transformer; + + /// The fragment that is copied from shared memory. + typedef typename Iterator::Fragment FetchedFragment; + /// The fragment that is obtained after the transformation by the transformer. + typedef typename Transformer::OutputFragment TransformedFragment; + /// Make sure the fragments match. + static_assert((platform::is_same::value), + ""); + /// The output fragment. + typedef TransformedFragment Fragment; + + /// The params. + struct Params { + /// The iterator params. + typename Iterator::Params iterator; + + /// Setup the params. + CUTLASS_HOST_DEVICE int initialize() { return iterator.initialize(); } + }; + + /// The storage in shared memory needed by that stream. + typedef typename Iterator::Storage SharedStorage; + + /// Ctor. + CUTLASS_DEVICE SharedLoadStream() {} + + /// Ctor. + CUTLASS_DEVICE SharedLoadStream(Params const ¶ms, SharedStorage &shared_storage) { + this->initialize(params, shared_storage); + } + + /// Initialize the stream. + CUTLASS_DEVICE void initialize(Params const ¶ms, SharedStorage &shared_storage) { + // The iterator. + iterator = Iterator(params.iterator, shared_storage); + // The transformer. + transformer = Transformer(); + } + + /// Load the data from shared memory to the fetch fragment. + CUTLASS_DEVICE void copy(FetchedFragment &fetched) { shared_iterator_load(iterator, fetched); } + + /// Load the data from shared memory to the fetch fragment. + CUTLASS_DEVICE void copy(int d, FetchedFragment &fetched) { + shared_iterator_load(iterator, fetched, d); + } + + /// Commit the data. + CUTLASS_DEVICE void commit(FetchedFragment &fetched, TransformedFragment &transformed) { + transformer.transform(fetched, transformed); + } + + /// Increment the stage. + CUTLASS_DEVICE void inc_stage() { iterator.inc_stage(); } + + /// The iterator. + Iterator iterator; + /// The transformer. + Transformer transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_shared_tile.h b/cutlass/gemm/gemm_shared_tile.h new file mode 100644 index 00000000..9ec4c9a2 --- /dev/null +++ b/cutlass/gemm/gemm_shared_tile.h @@ -0,0 +1,406 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines iterators for efficiently loading and storing tiles to and from shared memory. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedStoreTileAbTraits { + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The tile. + typedef typename ReshapeTile::Tile Tile; + /// The threads. + typedef Threads_ Threads; + /// The strides to compute the base position of the thread. + typedef Shape<0, ShapeCount::kWc, Tile::kC, kScalarsPerSts_> ThreadsStrides; + /// The skew. + static int const kSkew = 0; + /// The number of scalars per LDG/STG. + static int const kAccessSize = kScalarsPerSts_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of iterations needed to load/store the tile. + typedef Shape<1, + Tile::kH / Threads::kH, + Tile::kW / Threads::kW, + Tile::kC / Threads::kC / kAccessSize> + Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, Threads::kH * ShapeCount::kWc, Threads::kW * kAccessSize> Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, Threads::kH * ShapeCount::kWc, Threads::kW * kAccessSize> + ImmediateOffsetStrides; + + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int offset = ComputeThreadOffsetFromStrides::get(); + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedStoreWithSkewTileAbTraits { + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The tile without skews. + typedef typename ReshapeTile::Tile TileWithoutSkew; + /// The tile. + typedef typename ReshapeTile, + kScalarsPerSts_>::Tile Tile; + /// The threads. + typedef Threads_ Threads; + /// The skew. + static int const kSkew = kSkew_; + /// The number of scalars per STS. + static int const kAccessSize = kScalarsPerSts_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of iterations needed to load/store the tile. + typedef Shape<1, TileWithoutSkew::kH / Threads::kW, TileWithoutSkew::kW / Threads::kH> Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, ShapeCount::kWc, Threads::kH * kAccessSize> Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, ShapeCount::kWc, Threads::kH * kAccessSize> ImmediateOffsetStrides; + + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int offset = ComputeThreadOffsetFromStrides::get(); + return make_Coord(0, 0, offset, 0); + } + }; + + protected: + /// The strides to compute the base position of the thread. + typedef Shape<0, kScalarsPerSts_, ShapeCount::kHwc / Threads::kW> ThreadsStrides; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedLoadTileATraits { + static GemmOperand::Kind const kOperand = GemmOperand::kA; + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The tile without skew. + typedef Shape::kExtent * InstructionShape_::kD> + TileWithoutSkew_; + /// The tile with skew. + typedef Shape TileWithSkew; + /// The tile without skew after reshaping. + typedef typename ReshapeTile::Tile TileWithoutSkew; + /// The tile. + typedef typename ReshapeTile::Tile Tile; + /// The number of warps. + typedef Warps_ Warps; + /// The threads in a warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of scalars per LDG/STG. + // static int const kScalarsPerLds = kScalarsPerLds_; + static int const kAccessSize = kScalarsPerLds_; + /// The skew. + static int const kSkew = kSkew_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of warps. + static int const kWarps = GetExtent::kExtent; + /// The number of threads in one dimension of the warp. + static int const kThreadsPerWarp = GetExtent::kExtent; + + /// The number of iterations needed to load/store the tile. + typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp /* / kScalarsPerLds*/> + Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape + ImmediateOffsetStrides; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // Extract the warp. + int const warp = threadIdx.x / kWarpSize % Warps::kW; + // Compute the row offset for each thread + int const lane = (threadIdx.x & 0x0e) / 2; + // The offset. + int const offset = (warp * ThreadsPerWarp::kW + lane) * kAccessSize; + + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedLoadTileBTraits { + static GemmOperand::Kind const kOperand = GemmOperand::kB; + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The tile without skew. + typedef Shape::kExtent * InstructionShape_::kD> + TileWithoutSkew_; + /// The tile with skew. + typedef Shape TileWithSkew; + /// The tile without skew after reshaping. + typedef typename ReshapeTile::Tile TileWithoutSkew; + /// The tile. + typedef typename ReshapeTile::Tile Tile; + /// The number of warps. + typedef Warps_ Warps; + /// The threads in a warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of scalars per LDG/STG. + static int const kAccessSize = kScalarsPerLds_; + /// The skew. + static int const kSkew = kSkew_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of warps. + static int const kWarps = GetExtent::kExtent; + /// The number of threads in one dimension of the warp. + static int const kThreadsPerWarp = GetExtent::kExtent; + + /// The number of iterations needed to load/store the tile. + typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp /* / kAccessSize*/> Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape + ImmediateOffsetStrides; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // The position of the warp. + int const warp = threadIdx.x / (Warps::kW * kWarpSize); + + // Compute the column offset for each thread + int const lane = (threadIdx.x & 0x10) / 8 + (threadIdx.x & 0x01); + // The offset. + int const offset = (warp * ThreadsPerWarp::kH + lane) * kAccessSize; + + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedStoreTileDTraits { + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The dimension of the output tile. + typedef OutputTile_ OutputTile; + /// The warps in the tile. + typedef Warps_ Warps; + /// The threads in the warps. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of scalars per LDG/STG. + static int const kAccessSize = kScalarsPerSts_; + /// The skew. + static int const kSkew = kSkew_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of scalars per thread. + static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW; + /// The number of threads. + static int const kThreads = ShapeCount::kCount * kWarpSize; + /// The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). + static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew; + + /// The tile. + typedef Shape<1, 2, kScalarsPerRow / kAccessSize, kAccessSize> Tile; + /// The number of iterations needed to store the tile. + typedef Shape<1, 1, kScalarsPerThread / kAccessSize> Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kAccessSize> Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kAccessSize> ImmediateOffsetStrides; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // We issue STS.128 in the epilogue to store the accumulators to shared memory. When we use + // STS.128, we have to guarantee that threads in groups of 8 do not have bank conflicts (i.e + // they write to different banks). + + // Odd threads go to the second half of shared memory. + int const row = threadIdx.x & 0x01; + + int const warp_id = (threadIdx.x >> 5); + + int const warp_row = (warp_id % Warps::kW); + int const warp_col = (warp_id / Warps::kW); + + int hi_halfwarp_offset = OutputTile::kW * ((threadIdx.x >> 4) & 1); + int lo_halfwarp_offset = (((threadIdx.x >> 1) & 0x7) + warp_row * ThreadsPerWarp::kW); + + int col = kAccessSize * lo_halfwarp_offset + + warp_col * (ThreadsPerWarp::kH / 2) * OutputTile::kW + hi_halfwarp_offset; + + int offset = row * kScalarsPerRow + col; + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedLoadTileDTraits { + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The dimension of the output tile. + typedef OutputTile_ OutputTile; + /// The warps in the tile. + typedef Warps_ Warps; + /// The threads in the warps. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of scalars per LDG/STG. + static int const kAccessSize = kScalarsPerLds_; + /// The skew. + static int const kSkew = kSkew_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of scalars per thread. + static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW; + /// The number of threads. + static int const kThreads = ShapeCount::kCount * kWarpSize; + /// The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). + static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew; + + /// The tile. + typedef Shape<1, 2, kScalarsPerRow / kAccessSize, kAccessSize> Tile; + + // Compute the number of iterations per warp in the Tile::kH dimension. + static int const kIterationsInHPerWarp = kTileH_ / ShapeCount::kCount; + + // As shown above, the shared memory tile is composed of 2 rows and each rows is made of + // kScalarsPerRow. A warp is expected to read from the 1st row, then move to the 2nd row and go + // back to the 1st row. To model that scheme we define the Iterations shape as Shape. + // However, in some cases, we have only 1 iteration per warp. In that case, we must define the + // shape as Shape<1, 1, ...>. The following code does that. + static int const kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2; + // As soon as we know kIterationsH, it is trivial to compute kIterationsD: + static int const kIterationsD = kIterationsInHPerWarp / kIterationsH; + + /// The number of iterations needed to store the tile. + typedef Shape Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape ImmediateOffsetStrides; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // Each warp works on a different column. + int const h = threadIdx.x / kWarpSize; + // Compute the row. + int const w = (threadIdx.x & (kWarpSize - 1)) * kAccessSize; + int offset = 0; + if (Iterations::kH == 1) { + int const row = h & 0x1; + int const col = h / 2; + offset = row * ShapeCount::kWc + col * OutputTile::kW * Iterations::kD + w; + } else { + offset = h * OutputTile::kW * Iterations::kD + w; + } + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_traits.h b/cutlass/gemm/gemm_traits.h new file mode 100644 index 00000000..7a77d4b0 --- /dev/null +++ b/cutlass/gemm/gemm_traits.h @@ -0,0 +1,747 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines structural properties of complete GEMM computation. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The scalar type for A. + typename ScalarA_, + /// The scalar type for B. + typename ScalarB_, + /// The scalar type for C. + typename ScalarC_, + /// The scalar type for D. + typename ScalarD_, + /// The output tile size for the GEMM KxNxM. + typename OutputTile_, + /// The functor to do the math. + typename MultiplyAdd_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_, + /// The number of scalars per STS for A. + int kScalarsPerStsA_, + /// The number of scalars per LDG for A. + int kScalarsPerLdsA_, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_, + /// The number of scalars per STS for B. + int kScalarsPerStsB_, + /// The number of scalars per LDS for B. + int kScalarsPerLdsB_, + /// The number of scalars per LDG for C and STG for D. + int kScalarsPerLdgCAndStgD_, + /// The number of scalars per STS for D. + int kScalarsPerStsD_, + /// The number of scalars per LDS for D. + int kScalarsPerLdsD_, + /// The number of stages in shared memory to do single/double/triple-buffering. + int kStages_> + +struct GemmConfig { + // + /// The scalar for A. + typedef ScalarA_ ScalarA; + /// The scalar for B. + typedef ScalarB_ ScalarB; + /// The scalar for C. + typedef ScalarC_ ScalarC; + /// The scalar for D. + typedef ScalarD_ ScalarD; + + /// The tile. + typedef OutputTile_ OutputTile; + /// The functor to do D = A*B + C. + typedef MultiplyAdd_ MultiplyAdd; + /// The shape of the instruction. + typedef typename MultiplyAdd::InstructionShape InstructionShape; + /// The number of accumulators per warp. + typedef typename MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp; + /// The accumulators. + typedef typename MultiplyAdd::Accumulators Accumulators; + + /// The number of warps. + typedef typename ShapeDiv::Shape Warps; + /// The default warp size (32 threads per warp). + static int const kWarpSize = cutlass::kWarpSize; + /// The numnber of threads. + static int const kThreads = ShapeCount::kCount * kWarpSize; + + /// The number of scalars per LDG/STS/LDS for A. + static int const kScalarsPerLdgA = kScalarsPerLdgA_; + static int const kScalarsPerStsA = kScalarsPerStsA_; + static int const kScalarsPerLdsA = kScalarsPerLdsA_; + + /// The number of scalars per LDG/STS/LDS for B. + static int const kScalarsPerLdgB = kScalarsPerLdgB_; + static int const kScalarsPerStsB = kScalarsPerStsB_; + static int const kScalarsPerLdsB = kScalarsPerLdsB_; + + /// The number of scalars per LDG for C. + static int const kScalarsPerLdgC = kScalarsPerLdgCAndStgD_; + + /// The number of scalars per STS/LDS/STG for D. + static int const kScalarsPerStgD = kScalarsPerLdgCAndStgD_; + static int const kScalarsPerStsD = kScalarsPerStsD_; + static int const kScalarsPerLdsD = kScalarsPerLdsD_; + + /// The number of accumulators that are going to be fed from one LDS A/B. + static int const kAccumulatorsPerLdsA = kScalarsPerLdsA / InstructionShape::kD; + static int const kAccumulatorsPerLdsB = kScalarsPerLdsB / InstructionShape::kD; + + /// The number of stages in shared memory to implement double, triple, more-buffering. + static int const kStages = kStages_; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperA {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperA { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarA Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar; + + /// The traits class to build the iterator to load data from global memory for A^N. + typedef GemmGlobalTileTraits< + // That's A. + GemmOperand::kA, + // A is column-major. + MatrixLayout::kColumnMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgA> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer is float. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Shape, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsA> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for A^N. + typedef GemmSharedLoadTileATraits< + // The pointer is float const. + MultiplyAddScalar const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsA, + // The skew. + 0> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperA { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarA Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar; + + /// The traits class to build the iterator to load data from global memory for A^T. + typedef GemmGlobalTileTraits< + // That's A. + GemmOperand::kA, + // A is row-major. + MatrixLayout::kRowMajor, + // The pointer is float const. + Scalar const, + // The tile has size MxK in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgA> + GlobalTileTraits; + + /// The number of scalars in 4B. + static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar); + /// The traits class to build the iterator to store data to shared memory for A^T. + typedef GemmSharedStoreWithSkewTileAbTraits< + // The pointer is float. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Shape, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS. + GemmConfig_::kScalarsPerStsA, + // The skew to avoid bank conflicts added in the tile W dimension. + 128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsA / + GlobalTileTraits::Threads::kW * kScalarsIn4B> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for A^T. + typedef GemmSharedLoadTileATraits< + // The pointer is float const. + MultiplyAddScalar const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsA, + // The skew. + SharedStoreTileTraits::kSkew> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperB {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperB { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarB Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar; + + /// The traits class to build the iterator to load data from global memory for B^N. + typedef GemmGlobalTileTraits< + // That's B. + GemmOperand::kB, + // B is column-major. + MatrixLayout::kColumnMajor, + // The pointer is float const. + Scalar const, + // The tile has size MxK in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgB> + GlobalTileTraits; + + /// The number of scalars in 4B. + static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar); + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreWithSkewTileAbTraits< + // The pointer is float. + MultiplyAddScalar, + // The tile has size KxN in GEMM's terminology. + Shape, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS. + GemmConfig_::kScalarsPerStsB, + // The skew to avoid bank conflicts added in the tile W dimension. + 128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsB / + GlobalTileTraits::Threads::kW * kScalarsIn4B> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for B^N. + typedef GemmSharedLoadTileBTraits< + // The pointer is float const. + MultiplyAddScalar const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsB, + // The skew. + SharedStoreTileTraits::kSkew> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperB { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarB Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar; + + /// The traits class to build the iterator to load data from global memory for B^T. + typedef GemmGlobalTileTraits< + // That's B. + GemmOperand::kB, + // B is row-major. + MatrixLayout::kRowMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxN in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgB> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for B^T. + typedef GemmSharedStoreTileAbTraits< + // The pointer is float. + MultiplyAddScalar, + // The tile has size KxN in GEMM's terminology. + Shape, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsB> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for B^T. + typedef GemmSharedLoadTileBTraits< + // The pointer is float const. + MultiplyAddScalar const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsB, + // The skew. + 0> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The GEMM configuration. + typename GemmConfig_, + /// The stream to load A from global memory to shared memory. + typename GlobalLoadStreamA_, + /// The stream to load B from global memory to shared memory. + typename GlobalLoadStreamB_, + /// The stream to load A from shared memory. + typename SharedLoadStreamA_, + /// The stream to load B from shared memory. + typename SharedLoadStreamB_, + /// The epilogue. + typename Epilogue_, + /// The block swizzle to reorganize the grid. + typename BlockSwizzle_ = IdentityBlockSwizzle, + /// The index. + typename Index_ = int, + /// The tool used to clear accumulators. + typename ClearAccumulators_ = ClearAccumulators > + +struct GemmTraits { + /// The configuration. + typedef GemmConfig_ GemmConfig; + /// The output tile. + typedef typename GemmConfig::OutputTile OutputTile; + + /// The stream to load A from global memory to shared memory. + typedef GlobalLoadStreamA_ GlobalLoadStreamA; + /// The layout of A. + static MatrixLayout::Kind const kLayoutA = GlobalLoadStreamA::kLayout; + /// The scalar for A. + typedef typename GlobalLoadStreamA_::Scalar ScalarA; + + /// The stream to load B from global memory to shared memory. + typedef GlobalLoadStreamB_ GlobalLoadStreamB; + /// The layout of B. + static MatrixLayout::Kind const kLayoutB = GlobalLoadStreamB::kLayout; + /// The scalar for B. + typedef typename GlobalLoadStreamB_::Scalar ScalarB; + + /// The iterator for A to load from shared memory. + typedef SharedLoadStreamA_ SharedLoadStreamA; + /// The iterator for B to load from shared memory. + typedef SharedLoadStreamB_ SharedLoadStreamB; + + /// The shared storage for A. + typedef typename GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA; + // Btw, make sure we did not messed up with the size of the storage. + static_assert(sizeof(SharedStoreStorageA) == sizeof(typename SharedLoadStreamA::SharedStorage), + ""); + + /// The shared storage for B. + typedef typename GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB; + // Btw, make sure we did not messed up with the size of the storage. + static_assert(sizeof(SharedStoreStorageB) == sizeof(typename SharedLoadStreamB::SharedStorage), + ""); + + /// The multiply-add functor. + typedef typename GemmConfig::MultiplyAdd MultiplyAdd; + /// The epilogue. + typedef Epilogue_ Epilogue; + /// The scalars in the epilogue. + typedef typename Epilogue::ScalarC ScalarC; + typedef typename Epilogue::ScalarD ScalarD; + + /// The block swizzle to reorganize the grid. + typedef BlockSwizzle_ BlockSwizzle; + /// The index. + typedef Index_ Index; + /// Clear the accumulators. + typedef ClearAccumulators_ ClearAccumulators; + + /// The params. + struct Params { + /// The dimensions of the GEMM. + Index m, n, k; + /// The params for the A stream. + typename GlobalLoadStreamA::Params global_stream_a; + /// The params for the B stream. + typename GlobalLoadStreamB::Params global_stream_b; + /// The params for the A stream from shared memory. + typename SharedLoadStreamA::Params shared_stream_a; + /// The params for the B stream from shared memory. + typename SharedLoadStreamB::Params shared_stream_b; + /// The params for the epilogue. + typename Epilogue::Params epilogue; + + /// Initialize the parameters. + template + CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) { + // Set the problem size. + this->m = desc.m; + this->n = desc.n; + this->k = desc.k; + + // Initialize the iterator for A. + int error_code = + global_stream_a.initialize(reinterpret_cast(desc.d_a), desc.lda); + + if (error_code) { + return error_code; + } + + // Initialize the iterator for B. + error_code = global_stream_b.initialize(reinterpret_cast(desc.d_b), desc.ldb); + + if (error_code) { + return error_code; + } + + // The epilogue. + return epilogue.initialize(desc); + } + }; + + // The storage for A. + template + union StreamSharedStorage { + // The storage needed by the global stream. + typename GlobalLoadStream_::SharedStorage global; + // The storage needed by the shared stream. + typename SharedLoadStream_::SharedStorage shared; + }; + + // The storage for the main loop + prologue. + struct MainLoopSharedStorage { + // The storage to shuffle the A matrix in shared memory. + StreamSharedStorage stream_a; + // The storage to shuffle the B matrix in shared memory. + StreamSharedStorage stream_b; + // The storage to clear the accumulators if needed. + typename ClearAccumulators::SharedStorage clear; + }; + + /// The storage in shared memory. + union SharedStorage { + // The storage for the main loop. + MainLoopSharedStorage main_loop; + // The storage for the epilogue. + typename Epilogue::SharedStorage epilogue; + }; + + /// Assemble the global load streams for A/B. + struct GlobalLoadStream { + /// Ctor. + CUTLASS_DEVICE GlobalLoadStream(Params const& params, + SharedStorage& shared_storage, + dim3 const& block) + : stream_a(params.global_stream_a, + shared_storage.main_loop.stream_a.global, + cutlass::make_Coord(0, params.k, params.m), + cutlass::make_Coord(0, 0, block.x)), + stream_b(params.global_stream_b, + shared_storage.main_loop.stream_b.global, + cutlass::make_Coord(0, params.k, params.n), + make_Coord(0, 0, block.y)) {} + + /// Trigger the copies from shared memory to registers. + CUTLASS_DEVICE void copy() { + stream_a.copy(); + stream_b.copy(); + } + + /// Commit the data. + CUTLASS_DEVICE void commit() { + stream_a.commit(); + stream_b.commit(); + } + + /// Execute the residue code. + CUTLASS_DEVICE void residue(Index k, bool skip_clear = false) { + stream_a.residue(k, skip_clear); + stream_b.residue(k, skip_clear); + } + + /// The stream for A. + GlobalLoadStreamA stream_a; + /// The stream for B. + GlobalLoadStreamB stream_b; + }; + + /// Assemble the shared load stream for A/B. + struct SharedLoadStream { + /// Ctor. + CUTLASS_DEVICE SharedLoadStream(Params const& params, SharedStorage& shared_storage) { + stream_a.initialize(params.shared_stream_a, shared_storage.main_loop.stream_a.shared); + stream_b.initialize(params.shared_stream_b, shared_storage.main_loop.stream_b.shared); + } + + /// Trigger the copies from shared memory to registers. + CUTLASS_DEVICE void copy(int step) { + stream_a.copy(step, fetched_a[step % 2]); + stream_b.copy(step, fetched_b[step % 2]); + } + + /// Commit the data. + CUTLASS_DEVICE void commit(int step) { + stream_a.commit(fetched_a[step % 2], transformed_a[step % 2]); + stream_b.commit(fetched_b[step % 2], transformed_b[step % 2]); + } + + /// The fragment A. + CUTLASS_DEVICE typename SharedLoadStreamA::Fragment const& fragment_a(int step) const { + return transformed_a[step % 2]; + } + + /// The fragment B. + CUTLASS_DEVICE typename SharedLoadStreamB::Fragment const& fragment_b(int step) const { + return transformed_b[step % 2]; + } + + /// Increment the stage. + CUTLASS_DEVICE void inc_stage() { + stream_a.inc_stage(); + stream_b.inc_stage(); + } + + /// The stream for A. + SharedLoadStreamA stream_a; + /// The fragments to fetch A. + typename SharedLoadStreamA::FetchedFragment fetched_a[2]; + /// The fragments to transform A. + typename SharedLoadStreamA::TransformedFragment transformed_a[2]; + /// The stream for B. + SharedLoadStreamB stream_b; + /// The fragments to fetch B. + typename SharedLoadStreamB::FetchedFragment fetched_b[2]; + /// The fragments to transform B. + typename SharedLoadStreamB::TransformedFragment transformed_b[2]; + }; + + /// The memory fence for shared loads. + static CUTLASS_DEVICE void shared_load_fence(bool in_loop) { + if (SharedLoadStreamA::Iterator::kRequiresLoadFence || + SharedLoadStreamB::Iterator::kRequiresLoadFence) { + __syncthreads(); + } + } + + /// The memory fence for shared stores. + static CUTLASS_DEVICE void shared_store_fence(bool in_loop) { __syncthreads(); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct SimplifiedGemmTraitsHelper { + /// The global iterator to load A from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorA; + /// The data converter for A before storing to shared memory. + typedef Copy GlobalTransformerA; + /// The iterator to store A to shared memory. + typedef TileStoreIterator + SharedStoreIteratorA; + /// The stream to load A from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamA; + + /// The global iterator to load B from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorB; + /// The data converter for B before storing to shared memory. + typedef Copy GlobalTransformerB; + /// The iterator to store B to shared memory. + typedef TileStoreIterator + SharedStoreIteratorB; + /// The stream to load B from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamB; + + /// The iterator to load A from shared memory. + typedef TileLoadIterator + SharedLoadIteratorA; + /// The stream to load A from shared memory. + typedef SharedLoadStream SharedLoadStreamA; + /// The iterator to load B from shared memory. + typedef TileLoadIterator + SharedLoadIteratorB; + /// The stream to load B from shared memory. + typedef SharedLoadStream SharedLoadStreamB; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The config for the GEMM. + typename GemmConfig_, + /// The epilogue. + typename Epilogue_, + /// The index. + typename Index_ = int, + // The configuration for the A matrix. + typename GemmTileTraitsHelperA_ = GemmTileTraitsHelperA, + // The configuration for the B matrix. + typename GemmTileTraitsHelperB_ = GemmTileTraitsHelperB, + // The helper class to create the streams and iterators. + typename Helper_ = + SimplifiedGemmTraitsHelper > +struct SimplifiedGemmTraits : public GemmTraits< + // The config. + GemmConfig_, + // The stream to load A from global memory to shared memory. + typename Helper_::GlobalLoadStreamA, + // The stream to load B from global memory to shared memory. + typename Helper_::GlobalLoadStreamB, + // The stream to load A from shared memory. + typename Helper_::SharedLoadStreamA, + // The stream to load B from shared memory. + typename Helper_::SharedLoadStreamB, + // The epilogue. + Epilogue_, + // The block swizzle to reorganize the grid. + IdentityBlockSwizzle, + // The index. + Index_, + // The tool used to clear accumulators. + ClearAccumulators > { +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/grid_raster.h b/cutlass/gemm/grid_raster.h deleted file mode 100644 index 1f9e585e..00000000 --- a/cutlass/gemm/grid_raster.h +++ /dev/null @@ -1,436 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Abstraction for enumerating \p block_task within an input matrix - */ - -#include - -#include "../util/util.h" - - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * grid_raster_strategy - ******************************************************************************/ - -/** - * \brief Strategies for enumerating \p block_task within an input matrix - */ -struct grid_raster_strategy -{ - /// \brief Enumerants - enum kind_t - { - /** - * Default \p block_task assignment (currently ColumnMajor for N*, - * RowMajor for TT, and TiledCohort for TN) - */ - Default, - - /** - * Column-major \p block_task assignment - */ - ColumnMajor, - - /** - * Row-major \p block_task assignment - */ - RowMajor, - - /** - * Two-level \p block_task assignment (both column-major) - */ - TiledCohort, - }; -}; - - - -/****************************************************************************** - * grid_raster - ******************************************************************************/ - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * - * NB: This generic class is not directly constructible. Algorithm-specific - * template specializations will provide the API functionality prescribed here. - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - grid_raster_strategy::kind_t RasterStrategy> ///< Strategy for enumerating \p block_task within an input matrix -struct grid_raster -{ - //------------------------------------------------------------------------- - // Device API - //------------------------------------------------------------------------- - - /// Thread block's base item coordinates (x, y) in matrix C - int2 block_item_coords; - - /// Constructor - grid_raster(); - - /// Whether the thread block base coordinates are out-of-bounds for an m*n matrix C - bool is_block_oob(int m, int n); - - - //------------------------------------------------------------------------- - // Grid launch API - //------------------------------------------------------------------------- - - /// Compute the kernel grid extents (in thread blocks) for consuming an m*n matrix C - static dim3 grid_dims(int m, int n); -}; - - - -/****************************************************************************** - * grid_raster (ColumnMajor specialization) - ******************************************************************************/ - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (ColumnMajor specialization) - * - * Maps thread blocksin column-major fashion - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - matrix_transform_t::kind_t TransformB> ///< View transform enumerant for matrix B -struct grid_raster< - BlockItemsY, - BlockItemsX, - TransformA, - TransformB, - grid_raster_strategy::ColumnMajor> ///< Strategy for enumerating \p block_task within an input matrix -{ - //------------------------------------------------------------------------- - // Device API - //------------------------------------------------------------------------- - - /// Thread block's base item coordinates (x, y) in matrix C - int2 block_item_coords; - - /// Constructor - inline __device__ - grid_raster() - { - // blockDim.x is the fastest changing grid dim on current architectures - block_item_coords = make_int2( - BlockItemsX * blockIdx.y, - BlockItemsY * blockIdx.x); - } - - /// Whether the base \p block_item_coords are out-of-bounds for an m*n matrix C - inline __device__ - bool is_block_oob(int m, int n) - { - // ColumnMajor never rasterizes fully out-of-bounds thread blocks - return false; - } - - //------------------------------------------------------------------------- - // Grid launch API - //------------------------------------------------------------------------- - - /// Compute the kernel grid extents (in thread blocks) for consuming an m*n matrix C - inline __host__ __device__ - static dim3 grid_dims(int m, int n) - { - // blockDim.x is the fastest changing grid dim on current architectures - return dim3( - (m + BlockItemsY - 1) / BlockItemsY, - (n + BlockItemsX - 1) / BlockItemsX); - } -}; - - - -/****************************************************************************** - * grid_raster (RowMajor specialization) - ******************************************************************************/ - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (RowMajor specialization) - * - * Enumerates \p block_task in row-major fashion - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - matrix_transform_t::kind_t TransformB> ///< View transform enumerant for matrix B -struct grid_raster< - BlockItemsY, - BlockItemsX, - TransformA, - TransformB, - grid_raster_strategy::RowMajor> ///< Strategy for enumerating \p block_task within an input matrix -{ - //------------------------------------------------------------------------- - // Device API - //------------------------------------------------------------------------- - - /// Thread block's base item coordinates (x, y) in matrix C - int2 block_item_coords; - - /// Constructor - inline __device__ - grid_raster() - { - // blockDim.x is the fastest changing grid dim on current architectures - block_item_coords = make_int2( - BlockItemsX * blockIdx.x, - BlockItemsY * blockIdx.y); - } - - /// Whether the base \p block_item_coords are out-of-bounds for an m*n matrix C - inline __device__ - bool is_block_oob(int m, int n) - { - // RowMajor never rasterizes fully out-of-bounds thread blocks - return false; - } - - //------------------------------------------------------------------------- - // Grid launch API - //------------------------------------------------------------------------- - - /// Compute the kernel grid extents (in thread blocks) for consuming an m*n matrix C - inline __host__ __device__ - static dim3 grid_dims(int m, int n) - { - // blockDim.x is the fastest changing grid dim on current architectures - return dim3( - (n + BlockItemsX - 1) / BlockItemsX, - (m + BlockItemsY - 1) / BlockItemsY); - } - -}; - - - -/****************************************************************************** - * grid_raster (TiledCohort specialization) - ******************************************************************************/ - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (TiledCohort specialization) - * - * Enumerates \p block_task in column-major fashion across "cohort" tiles (where - * cohorts are CohortBlocksY high and CohortBlocksX wide), and enumerates cohorts - * across the matrix in column-major fashion. - * - * Grid layout: - * - gridDim.y is the height of the grid in cohorts - * - gridDim.x is the width of the grid in cohorts multiplied by the number of - * thread blocks per cohort - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - matrix_transform_t::kind_t TransformB> ///< View transform enumerant for matrix B -struct grid_raster< - BlockItemsY, - BlockItemsX, - TransformA, - TransformB, - grid_raster_strategy::TiledCohort> ///< Strategy for enumerating \p block_task within an input matrix -{ - enum - { - /// Height in thread blocks of a grid rasterization cohort - CohortBlocksY = 2, - - /// Width in thread blocks of a grid rasterization cohort - CohortBlocksX = 2, - - /// Number of thread blocks per cohort - BlocksPerCohort = CohortBlocksY * CohortBlocksX, - - /// Height in items of a grid rasterization cohort - CohortItemsY = CohortBlocksY * BlockItemsY, - - /// Width in items of a grid rasterization cohort - CohortItemsX = CohortBlocksX * BlockItemsX, - - }; - - //------------------------------------------------------------------------- - // Device API - //------------------------------------------------------------------------- - - /// Thread block's base item coordinates (x, y) in matrix C - int2 block_item_coords; - - /// Constructor - inline __device__ - grid_raster() - { - int block_idx_cohort = blockIdx.x % BlocksPerCohort; - int2 cohort_coords_grid = make_int2( - blockIdx.x / BlocksPerCohort, - blockIdx.y); - - // Cohort is rastered in column-major order - int2 block_coords_cohort = make_int2( - block_idx_cohort / CohortBlocksY, - block_idx_cohort % CohortBlocksY); - - block_item_coords = make_int2( - ((cohort_coords_grid.x * CohortBlocksX) + block_coords_cohort.x) * BlockItemsX, - ((cohort_coords_grid.y * CohortBlocksY) + block_coords_cohort.y) * BlockItemsY); - } - - /// Whether the base \p block_item_coords are out-of-bounds for an m*n matrix C - inline __device__ - bool is_block_oob(int m, int n) - { - /// thread blocks within the cohort may be fully out-of-bounds - return (block_item_coords.x >= n) || (block_item_coords.y >= m); - } - - //------------------------------------------------------------------------- - // Grid launch API - //------------------------------------------------------------------------- - - /// Compute the kernel grid extents (in thread blocks) for consuming an m*n matrix C - inline __host__ __device__ - static dim3 grid_dims(int m, int n) - { - // Extents of C matrix in cohorts - int2 grid_cohort_dims = make_int2( - (n + CohortItemsX - 1) / CohortItemsX, - (m + CohortItemsY - 1) / CohortItemsY); - - return dim3( - grid_cohort_dims.x * BlocksPerCohort, // gridDim.x is width of grid in cohorts * size of cohort in blocks - grid_cohort_dims.y, // gridDim.y is height of grid in cohorts - 1); // gridDim.z is reserved for optional k-splitting - } -}; - - -/****************************************************************************** - * grid_raster (Default specializations) - ******************************************************************************/ - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (Default N* specialization) - * - * Maps thread blocksin column-major fashion - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - matrix_transform_t::kind_t TransformB> ///< View transform enumerant for matrix B -struct grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::NonTranspose, ///< View transform enumerant for matrix A - TransformB, - grid_raster_strategy::Default> ///< Strategy for enumerating \p block_task within an input matrix -: - grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::NonTranspose, - TransformB, - grid_raster_strategy::ColumnMajor> -{}; - - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (Default TT specialization) - * - * Maps thread blocksin row-major fashion - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX> ///< Width in columns of a block-wide tile in matrix C -struct grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::Transpose, ///< View transform enumerant for matrix A - matrix_transform_t::Transpose, ///< View transform enumerant for matrix B - grid_raster_strategy::Default> ///< Strategy for enumerating \p block_task within an input matrix -: - grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::Transpose, - matrix_transform_t::Transpose, - grid_raster_strategy::RowMajor> -{}; - - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (Default TN specialization) - * - * Maps thread blocksin blocked cohorts - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX> ///< Width in columns of a block-wide tile in matrix C -struct grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::Transpose, ///< View transform enumerant for matrix A - matrix_transform_t::NonTranspose, ///< View transform enumerant for matrix B - grid_raster_strategy::Default> ///< Strategy for enumerating \p block_task within an input matrix -: - grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::Transpose, - matrix_transform_t::NonTranspose, - grid_raster_strategy::TiledCohort> -{}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/hgemm_global_tile.h b/cutlass/gemm/hgemm_global_tile.h new file mode 100644 index 00000000..f14dbb31 --- /dev/null +++ b/cutlass/gemm/hgemm_global_tile.h @@ -0,0 +1,90 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Tile traits used to construct global tile iterator for HGEMM. This is intended to + partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate + memory accesses larger than 16 bits. +*/ +#pragma once + +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmCrosswiseGlobalTileTraits : public GemmGlobalTileTraits< + // Which GEMM operand? + kOperand_, + // The layout. + kLayout_, + // The scalar. + Scalar_, + // The tile. + Tile_, + // The threads. + Threads_, + // The number of scalars per LDG/STG. + kAccessSize_> { + /// The base class. + typedef GemmGlobalTileTraits Base; + /// The threads. + typedef typename Base::Threads Threads; + /// The threads strides. + typedef Shape<1, 2, Base::Tile::kC> ThreadsDelta; + /// The strides in each dimension between different loads/stores. + typedef Shape Delta; + /// The number of iterations needed to load/store the tile. + typedef Shape + Iterations; + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH; + int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW; + + return make_Coord(0, thread_offset_h, thread_offset_w, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/hgemm_multiply_add.h b/cutlass/gemm/hgemm_multiply_add.h new file mode 100644 index 00000000..ebbdd06e --- /dev/null +++ b/cutlass/gemm/hgemm_multiply_add.h @@ -0,0 +1,104 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Specialization implementing multiply-add operation on half-precision floating point + fragments. +*/ +#pragma once + +#include + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Template performing matrix multiply-add operation within a thread +template +struct ThreadMultiplyAdd { + /// The shape of the instruction. + typedef Shape<1, 1, 2, 1> InstructionShape; + /// The number of accumulators per thread. + typedef AccumulatorsPerThread_ AccumulatorsPerThread; + /// The number of threads per warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of accumulators per warp. + typedef typename ShapeMul::Shape AccumulatorsPerWarp; + /// The type for A. + typedef half ScalarA; + /// The fragment for A. + typedef Fragment FragmentA; + /// The type for B. + typedef half ScalarB; + /// The fragment for B. + typedef Fragment FragmentB; + /// The type for C and D. + typedef half ScalarC; + /// The accumulators. + typedef Fragment Accumulators; + + /// Make sure there's an even number of elements in both dimensions. + static_assert(AccumulatorsPerThread::kH % 2 == 0, "Invalid size"); + static_assert(AccumulatorsPerThread::kW % 2 == 0, "Invalid size"); + + /// Ctor. + CUTLASS_DEVICE ThreadMultiplyAdd() {} + + /// Multiply : d = a*b + c. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { +#if defined(__CUDACC__) && __CUDA_ARCH__ >= 530 + // The inputs. + __half2 const* a_half2 = reinterpret_cast<__half2 const*>(&a[0]); + __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]); + __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]); + + // The output. + __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]); + + for (int j = 0; j < AccumulatorsPerThread::kH / 2; ++j) { + for (int i = 0; i < AccumulatorsPerThread::kW / 2; ++i) { + // The offsets in the output fragment. + int const k0 = (2 * j + 0) * (AccumulatorsPerThread::kW / 2) + i; + int const k1 = (2 * j + 1) * (AccumulatorsPerThread::kW / 2) + i; + + // Compute the product a[i] * b[j].H0_H0. + d_half2[k0] = __hfma2(a_half2[i], __low2half2(b_half2[j]), c_half2[k0]); + // Compute the product a[i] * b[j].H1_H1. + d_half2[k1] = __hfma2(a_half2[i], __high2half2(b_half2[j]), c_half2[k1]); + } + } +#endif + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/hgemm_swizzle.h b/cutlass/gemm/hgemm_swizzle.h new file mode 100644 index 00000000..ebec0d46 --- /dev/null +++ b/cutlass/gemm/hgemm_swizzle.h @@ -0,0 +1,94 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in + shared memory for multiplicands. +*/ +#pragma once + +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmSwizzle { + /// The global iterator. + typedef GlobalIterator_ GlobalIterator; + /// The source fragment. + typedef typename GlobalIterator::Fragment Fragment; + /// The shape of the source fragment. + typedef typename GlobalIterator::FragmentShape FragmentShape; + + /// The input fragment. + typedef Fragment InputFragment; + /// The output fragment. + typedef Fragment OutputFragment; + + /// The src/dst must be half fragments. + static_assert((platform::is_same::value), "Works on half"); + + /// The number of elements must be a multiple of 2. + static_assert(FragmentShape::kH == 2 && ShapeCount::kWc == 2, "Not multiple of 2"); + + /// Ctor. + CUTLASS_DEVICE HgemmSwizzle() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) { + // Expose src/dst as int arrays. + int const* src_int = reinterpret_cast(&src[0]); + int* dst_int = reinterpret_cast(&dst[0]); + + // Transpose the data. + for (int d = 0; d < FragmentShape::kD; ++d) { + // The indices to read two consecutive "rows". + int const i0 = 2 * d + 0; + int const i1 = 2 * d + 1; + + int a0 = src_int[i0]; + int a1 = src_int[i1]; + + int b0, b1; + asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(a0), "r"(a1)); + asm volatile("prmt.b32 %0, %1, %2, 0x7632;" : "=r"(b1) : "r"(a0), "r"(a1)); + + // The indices to store with "strides". + int const j0 = 0 * (ShapeCount::kDhw / 2) + d; + int const j1 = 1 * (ShapeCount::kDhw / 2) + d; + + dst_int[j0] = b0; + dst_int[j1] = b1; + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/hgemm_traits.h b/cutlass/gemm/hgemm_traits.h new file mode 100644 index 00000000..78e5bac5 --- /dev/null +++ b/cutlass/gemm/hgemm_traits.h @@ -0,0 +1,391 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies structural properties of half-precision GEMM computation. +*/ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_ = 2, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_ = 2> +struct HgemmConfig + : public GemmConfig< + /// The scalar type for A. + half, + /// The scalar type for B. + half, + /// The scalar type for C. + half, + /// The scalar type for D. + half, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, half, half, half>, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 8, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 8, + /// The number of scalars per LDG for C and STG for D. + 2, + /// The number of scalars per STS for D. + 8, + /// The number of scalars per LDS for D. + 2, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTransformerA {}; + +template +struct HgemmTransformerA { + typedef Convert Transformer; +}; + +template +struct HgemmTransformerA { + typedef HgemmSwizzle Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTransformerB {}; + +template +struct HgemmTransformerB { + typedef Convert Transformer; +}; + +template +struct HgemmTransformerB { + typedef HgemmSwizzle Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTileTraitsHelperA : public GemmTileTraitsHelperA {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTileTraitsHelperA + : public GemmTileTraitsHelperA { + /// The base config. + typedef GemmTileTraitsHelperA Base; + + /// The traits class to build the iterator to load data from global memory for A^T. + typedef HgemmCrosswiseGlobalTileTraits< + GemmOperand::kA, + // The layout. + MatrixLayout::kRowMajor, + // The pointer. + half const, + // The tile has size MxK in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>, + // The threads are distributed as (threads / K ) x K (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc) + GemmConfig_::kScalarsPerLdgA> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for A^T. + typedef GemmSharedStoreWithSkewTileAbTraits< + // The pointer. + half, + // The tile has size KxM in GEMM's terminology. + Shape, + // The threads are distributed as warps x 32(the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + 2, + // The skew to avoid bank conflicts added in the tile W dimension. + 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for A^T. + typedef GemmSharedLoadTileATraits< + // The pointer. + half const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + 8, + // The skew. + SharedStoreTileTraits::kSkew> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTileTraitsHelperB : public GemmTileTraitsHelperB {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTileTraitsHelperB + : public GemmTileTraitsHelperB { + /// The base config. + typedef GemmTileTraitsHelperB Base; + + /// The traits class to build the iterator to load data from global memory for B^N. + typedef HgemmCrosswiseGlobalTileTraits< + GemmOperand::kB, + // The layout. + MatrixLayout::kColumnMajor, + // The pointer. + half const, + // The tile has size KxN in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc) + GemmConfig_::kScalarsPerLdgB> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreWithSkewTileAbTraits< + // The pointer. + half, + // The tile has size KxN in GEMM's terminology. + Shape, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + 2, + // The skew to avoid bank conflicts added in the tile W dimension. + 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for B^N. + typedef GemmSharedLoadTileBTraits< + // The pointer. + half const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + 8, + // The skew. + SharedStoreTileTraits::kSkew> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<32, 8, 8>, + /// The number of halfs loaded in one LDG for A. + int kScalarsPerLdgA_ = 2, + /// The number of halfs loaded in one LDG for B. + int kScalarsPerLdgB_ = 2, + /// The index. + typename Index_ = int> +struct HgemmTraitsHelper { + /// The HGEMM config. + typedef HgemmConfig + GemmConfig; + /// The GEMM config for A. + typedef HgemmTileTraitsHelperA GemmTileTraitsHelperA; + /// The GEMM config for B. + typedef HgemmTileTraitsHelperB GemmTileTraitsHelperB; + + /// The iterator to load A from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorA; + /// The default transformer for A. + typedef typename HgemmTransformerA::Transformer GlobalTransformerA; + /// The iterator to store A to shared memory. + typedef TileStoreIterator + SharedStoreIteratorA; + /// The stream to load A from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamA; + + /// The iterator to load B from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorB; + // The default transformer for B. + typedef typename HgemmTransformerB::Transformer GlobalTransformerB; + /// The iterator to store B to shared memory. + typedef TileStoreIterator + SharedStoreIteratorB; + /// The stream to load B from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamB; + + /// The iterator to load A from shared memory + typedef TileLoadIterator + SharedLoadIteratorA; + /// The stream to load A from shared memory. + typedef SharedLoadStream SharedLoadStreamA; + /// The iterator to load B from shared memory. + typedef TileLoadIterator + SharedLoadIteratorB; + /// The stream to load B from shared memory. + typedef SharedLoadStream SharedLoadStreamB; + + /// The functor to do the multiply-add in the main loop. + typedef typename GemmConfig::MultiplyAdd MultiplyAdd; + /// The object to clear accumulators. + typedef ClearAccumulators ClearAccumulators; + + /// The traits class for the epilogue. + typedef SimplifiedGemmEpilogueTraits GemmEpilogueTraits; + /// The epilogue. + typedef GemmEpilogue Epilogue; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_ = Shape<8, 128, 128>, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_ = LinearScaling, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<8, 8, 16>, + /// The number of halfs loaded in one LDG for A. + int kScalarsPerLdgA_ = 2, + /// The number of halfs loaded in one LDG for B. + int kScalarsPerLdgB_ = 2, + /// The index. + typename Index_ = int, + /// The helper class. + typename Helper_ = HgemmTraitsHelper > +struct HgemmTraits : public GemmTraits< + // The config. + typename Helper_::GemmConfig, + // The stream to load A from global memory to shared memory. + typename Helper_::GlobalLoadStreamA, + // The stream to load B from global memory to shared memory. + typename Helper_::GlobalLoadStreamB, + // The stream to load A from shared memory. + typename Helper_::SharedLoadStreamA, + // The stream to load B from shared memory. + typename Helper_::SharedLoadStreamB, + // The epilogue. + typename Helper_::Epilogue, + // The block swizzle to reorganize the grid. + IdentityBlockSwizzle, + // The index. + Index_, + // The tool used to clear accumulators. + typename Helper_::ClearAccumulators> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/identity_block_swizzle.h b/cutlass/gemm/identity_block_swizzle.h new file mode 100644 index 00000000..e1bdb2e0 --- /dev/null +++ b/cutlass/gemm/identity_block_swizzle.h @@ -0,0 +1,48 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies functors for mapping blockIdx to partitions of the GEMM computation. + + Currently, we only implement an identity mapping. +*/ +#pragma once + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +struct IdentityBlockSwizzle { + /// Ctor. + CUTLASS_DEVICE IdentityBlockSwizzle() {} + + /// Swizzle the block index. + CUTLASS_DEVICE dim3 swizzle() { return blockIdx; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/igemm_epilogue.h b/cutlass/gemm/igemm_epilogue.h new file mode 100644 index 00000000..0d699803 --- /dev/null +++ b/cutlass/gemm/igemm_epilogue.h @@ -0,0 +1,320 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and + floating-point output matrix formats. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmFloatToInt8Converter { + /// The input fragment. + typedef Fragment InputFragment; + /// The output fragment. + typedef Fragment OutputFragment; + + // We are packing 4 floats into int32 registers so we need kElements to be multiple of 4. + static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4"); + + /// Ctor. + CUTLASS_DEVICE IgemmFloatToInt8Converter() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) { + transform(src, 0, dst); + } + + /// Transform a fragment. + template + CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) { + // The inputs. + float4 const* src_f4 = reinterpret_cast(&src[0]); + // The outputs. + int* dst_int = reinterpret_cast(&dst[0]); + + // Iterate over the floats and pack them together to produce ints. + for (int i = 0; i < kElements_ / 4; ++i) { + // Read the float4. + float4 f4 = src_f4[i]; + + // Clamp the 4 elements of the floats to the [-128, +127] range. + float x = fmaxf(-128.f, fminf(127.f, f4.x)); + float y = fmaxf(-128.f, fminf(127.f, f4.y)); + float z = fmaxf(-128.f, fminf(127.f, f4.z)); + float w = fmaxf(-128.f, fminf(127.f, f4.w)); + + // Convert to integers. + int ix = (int)x; + int iy = (int)y; + int iz = (int)z; + int iw = (int)w; + + // Extract the lower bytes to build an int32 with 4 int8. + asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(ix) : "r"(iy)); + asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(iz) : "r"(iw)); + asm volatile("prmt.b32 %0, %0, %1, 0x5410;" : "+r"(ix) : "r"(iz)); + + // Store the int. + dst_int[i] = ix; + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmGlobalStoreTransformer { + typedef Convert, OutputFragment_> Transformer; +}; + +template +struct IgemmGlobalStoreTransformer > { + typedef IgemmFloatToInt8Converter Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmInt8ToFloatConverter { + /// The input fragment. + typedef Fragment InputFragment; + /// The output fragment. + typedef Fragment OutputFragment; + + // We are unpacking 4 int8s from int32. + static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4"); + + /// Ctor. + CUTLASS_DEVICE IgemmInt8ToFloatConverter() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) { + transform(src, 0, dst); + } + + /// Transform a fragment. + template + CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) { + // The inputs. + int const* src_int = reinterpret_cast(&src[0]); + // The outputs. + float4* dst_f4 = reinterpret_cast(&dst[0]); + + // Iterate over the int8 and unpack them together to produce floats. + for (int i = 0; i < kElements_ / 4; ++i) { + // Read the int. + int ix, iy, iz, iw = src_int[i]; + + // Extract the 4 bytes. + asm volatile("prmt.b32 %0, 0x0, %1, 0x4440;" : "=r"(ix) : "r"(iw)); + asm volatile("prmt.b32 %0, 0x0, %1, 0x4441;" : "=r"(iy) : "r"(iw)); + asm volatile("prmt.b32 %0, 0x0, %1, 0x4442;" : "=r"(iz) : "r"(iw)); + asm volatile("prmt.b32 %0, 0x0, %1, 0x4443;" : "=r"(iw) : "r"(iw)); + + // The floats. + float fx, fy, fz, fw; + + // Convert to floats (make sure we generate I2F.F32.S8). + asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fx) : "r"(ix)); + asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fy) : "r"(iy)); + asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fz) : "r"(iz)); + asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fw) : "r"(iw)); + + // Store the float4. + dst_f4[i] = make_float4(fx, fy, fz, fw); + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmGlobalLoadTransformer { + typedef Convert > Transformer; +}; + +template +struct IgemmGlobalLoadTransformer, float> { + typedef IgemmInt8ToFloatConverter Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmSharedStoreTransformer { + typedef Convert, OutputFragment_> Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmEpilogueTraitsHelper + : public GemmEpilogueTraitsHelper { + /// The base class. + typedef GemmEpilogueTraitsHelper Base; + /// The config. + typedef IgemmConfig_ IgemmConfig; + + /// The scalar type of the epilogue. + typedef typename Base::Scalar Scalar; + /// The iterations. + typedef typename Base::Iterations Iterations; + /// The iterations strides. + typedef typename Base::Delta Delta; + + /// The traits class for the iterator. + typedef typename Base::GlobalLoadTileTraits GlobalLoadTileTraits; + /// The iterator to store to shared memory. + typedef GemmGlobalIteratorCd GlobalLoadIteratorC; + /// The fragment that needs to be produced by the load iterator. + typedef typename GlobalLoadIteratorC::Fragment GlobalFragmentC; + /// The transformer from loaded data to math fragment. + typedef + typename IgemmGlobalLoadTransformer::Transformer GlobalTransformerC; + + /// The traits class for the iterator. + typedef typename Base::GlobalStoreTileTraits GlobalStoreTileTraits; + /// The iterator to store to shared memory. + typedef GemmGlobalIteratorCd GlobalStoreIteratorD; + /// The fragment that needs to be passed to that store iterator. + typedef typename GlobalStoreIteratorD::Fragment GlobalFragmentD; + /// The transformer from accumulators to shared memory fragments. + typedef + typename IgemmGlobalStoreTransformer::Transformer GlobalTransformerD; + + /// The traits class for the shared iterator to store D to shared memory. + typedef typename Base::SharedStoreTileTraits SharedStoreTileTraits; + /// The shared iterator to store D to shared memory. + typedef TileStoreIterator + SharedStoreIteratorD; + /// The fragment that needs to be passed to that store iterator. + typedef typename SharedStoreIteratorD::Fragment SharedStoreFragmentD; + /// The transformer from accumulators to shared memory fragments. + typedef typename IgemmSharedStoreTransformer::Transformer + SharedStoreTransformerD; + /// The traits class for the shared iterator to load D from shared memory. + typedef typename Base::SharedLoadTileTraits SharedLoadTileTraits; + /// The shared iterator to load D from shared memory. + typedef TileLoadIterator + SharedLoadIteratorD; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The config. + typename IgemmConfig_, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_, + /// The index. + typename Index_ = int, + /// The helper class to assemble the traits. + typename Helper_ = IgemmEpilogueTraitsHelper > +struct IgemmEpilogueTraits : public GemmEpilogueTraits< + // The output tile. + typename IgemmConfig_::OutputTile, + // The accumulators. + typename IgemmConfig_::Accumulators, + // The global iterator for C. + typename Helper_::GlobalLoadIteratorC, + // The transformer for C. + typename Helper_::GlobalTransformerC, + // The transformer for D. + typename Helper_::GlobalTransformerD, + // The global iterator for D. + typename Helper_::GlobalStoreIteratorD, + // The iterator to store D to shared memory. + typename Helper_::SharedStoreIteratorD, + // The shared store transformer for D. + typename Helper_::SharedStoreTransformerD, + // The iterator to load D from shared memory. + typename Helper_::SharedLoadIteratorD, + // The iterations. + typename Helper_::Iterations, + // The strides between iterations. + typename Helper_::Delta, + // The functor to be used in the epilogue. + EpilogueFunctor_, + // The index. + Index_> { + /// Do we output in int8? + static bool const kInt8Output = + platform::is_same::value != 0; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmEpilogue : public GemmEpilogue { + /// The base class. + typedef GemmEpilogue Base; + + /// Ctor. + CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_, + typename Base::SharedStorage& shared_storage_, + typename Base::Index m_, + typename Base::Index n_) + : Base(params_, shared_storage_, m_, n_) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmEpilogue : public GemmEpilogue { + /// The base class. + typedef GemmEpilogue Base; + + /// Ctor. + CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_, + typename Base::SharedStorage& shared_storage_, + typename Base::Index m_, + typename Base::Index n_) + : Base(params_, shared_storage_, m_, n_) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/igemm_global_tile.h b/cutlass/gemm/igemm_global_tile.h new file mode 100644 index 00000000..6993c631 --- /dev/null +++ b/cutlass/gemm/igemm_global_tile.h @@ -0,0 +1,95 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements tile iterators to partition the thread block tile into 2D subtiles and + efficiently load each. Applies permute transformation to construct 'interleaved K-strided' + data layout in which 4-element dot products from the same K index are arranged in consecutive + locations within shared memory. + + Supports efficient loads from shared memory to target the DP4A instruction. +*/ +#pragma once + +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmContiguousGlobalTileTraits : public GemmGlobalTileTraits< + // Which GEMM operand? + kOperand_, + // The layout. + kLayout_, + // The scalar. + Scalar_, + // The tile. + Tile_, + // The threads. + Threads_, + // The number of scalars per LDG/STG. + kAccessSize_> { + /// The base class. + typedef GemmGlobalTileTraits Base; + /// The threads. + typedef typename Base::Threads Threads; + /// The strides in each dimension between different loads/stores. + typedef Shape Delta; + /// The number of iterations needed to load/store the tile. + typedef Shape + Iterations; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH; + int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW; + + return make_Coord(0, thread_offset_h, thread_offset_w, 0); + } + }; + + public: + /// The threads strides. + typedef Shape<1, 4, Base::Tile::kC> ThreadsDelta; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/igemm_multiply_add.h b/cutlass/gemm/igemm_multiply_add.h new file mode 100644 index 00000000..5a8baec5 --- /dev/null +++ b/cutlass/gemm/igemm_multiply_add.h @@ -0,0 +1,89 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements matrix multiply accumulate operation of 8-bit integer data using DP4A + instruction. +*/ +#pragma once + +#include + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Template performing matrix multiply-add operation within a thread +template +struct ThreadMultiplyAdd { + /// The shape of the instruction. + typedef Shape<4, 1, 1> InstructionShape; + /// The number of accumulators per thread. + typedef AccumulatorsPerThread_ AccumulatorsPerThread; + /// The number of threads per warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of accumulators per warp. + typedef typename ShapeMul::Shape AccumulatorsPerWarp; + /// The type for A. + typedef int8_t ScalarA; + /// The fragment for A. + typedef Fragment FragmentA; + /// The type for B. + typedef int8_t ScalarB; + /// The fragment for B. + typedef Fragment FragmentB; + /// The type for C and D. + typedef int ScalarC; + /// The accumulators. + typedef Fragment Accumulators; + + /// Ctor. + CUTLASS_DEVICE ThreadMultiplyAdd() {} + + /// Multiply : d = a*b + c. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { + // The inputs. + int const* a_int = reinterpret_cast(&a[0]); + int const* b_int = reinterpret_cast(&b[0]); + + for (int j = 0; j < AccumulatorsPerThread::kH; ++j) { + for (int i = 0; i < AccumulatorsPerThread::kW; ++i) { + asm volatile("dp4a.s32.s32 %0, %1, %2, %3;" + : "=r"(d[j * AccumulatorsPerThread::kW + i]) + : "r"(a_int[i]), "r"(b_int[j]), "r"(c[j * AccumulatorsPerThread::kW + i])); + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/igemm_swizzle.h b/cutlass/gemm/igemm_swizzle.h new file mode 100644 index 00000000..77cf7118 --- /dev/null +++ b/cutlass/gemm/igemm_swizzle.h @@ -0,0 +1,115 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Transposes a fragment of data containing packed 8-bit integer elements. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmSwizzle { + /// The global iterator. + typedef GlobalIterator_ GlobalIterator; + /// The source fragment. + typedef typename GlobalIterator::Fragment Fragment; + /// The shape of the source fragment. + typedef typename GlobalIterator::FragmentShape FragmentShape; + + /// The source fragment. + typedef Fragment InputFragment; + /// The destination fragment. + typedef Fragment OutputFragment; + + /// The src/dst must be int8 fragments. + static_assert((platform::is_same::value), "Works on int8"); + + /// The number of elements must be a multiple of 4. + static_assert(FragmentShape::kH % 4 == 0 && ShapeCount::kWc % 4 == 0, + "Not multiple of 4"); + + /// Ctor. + CUTLASS_DEVICE IgemmSwizzle() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) { + // Expose src/dst as int arrays. + int const* src_int = reinterpret_cast(&src[0]); + int* dst_int = reinterpret_cast(&dst[0]); + + // Transpose the data. + for (int d = 0; d < FragmentShape::kD; ++d) { + for (int h = 0; h < FragmentShape::kH / 4; ++h) { + for (int w = 0; w < ShapeCount::kWc / 4; ++w) { + int const i0 = d * (ShapeCount::kHwc / 4) + + (4 * h + 0) * (ShapeCount::kWc / 4) + w; + int const i1 = d * (ShapeCount::kHwc / 4) + + (4 * h + 1) * (ShapeCount::kWc / 4) + w; + int const i2 = d * (ShapeCount::kHwc / 4) + + (4 * h + 2) * (ShapeCount::kWc / 4) + w; + int const i3 = d * (ShapeCount::kHwc / 4) + + (4 * h + 3) * (ShapeCount::kWc / 4) + w; + + int a0 = src_int[i0]; + int a1 = src_int[i1]; + int a2 = src_int[i2]; + int a3 = src_int[i3]; + + int b0, b1, b2, b3, c0; + asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(b0) : "r"(a0), "r"(a1)); + asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(c0) : "r"(a2), "r"(a3)); + asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(b0), "r"(c0)); + + asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(b1) : "r"(a0), "r"(a1)); + asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(c0) : "r"(a2), "r"(a3)); + asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b1) : "r"(b1), "r"(c0)); + + asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(b2) : "r"(a0), "r"(a1)); + asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(c0) : "r"(a2), "r"(a3)); + asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b2) : "r"(b2), "r"(c0)); + + asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(b3) : "r"(a0), "r"(a1)); + asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(c0) : "r"(a2), "r"(a3)); + asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b3) : "r"(b3), "r"(c0)); + + dst_int[i0] = b0; + dst_int[i1] = b1; + dst_int[i2] = b2; + dst_int[i3] = b3; + } + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/igemm_traits.h b/cutlass/gemm/igemm_traits.h new file mode 100644 index 00000000..9e8b9365 --- /dev/null +++ b/cutlass/gemm/igemm_traits.h @@ -0,0 +1,393 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies structural properties of mixed-precision integer GEMM. Multiplicands are assumed + to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output + formats vary. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// The output type. + typename ScalarD_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_> +struct IgemmConfig + : public GemmConfig< + /// The scalar type for A. + int8_t, + /// The scalar type for B. + int8_t, + /// The scalar type for C. + ScalarD_, + /// The scalar type for D. + ScalarD_, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, int8_t, int8_t, int>, + /// The number of scalars per LDG for A. + 4, + /// The number of scalars per STS for A. + 4, + /// The number of scalars per LDS for A. + 16, + /// The number of scalars per LDG for B. + 4, + /// The number of scalars per STS for B. + 4, + /// The number of scalars per LDS for B. + 16, + /// The number of scalars per LDG for C and STG for D. + 1, + /// The number of scalars per STS for D. + 4, + /// The number of scalars per LDS for D. + 1, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmConfig + : public GemmConfig< + /// The scalar type for A. + int8_t, + /// The scalar type for B. + int8_t, + /// The scalar type for C. + int8_t, + /// The scalar type for D. + int8_t, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, int8_t, int8_t, int>, + /// The number of scalars per LDG for A. + 4, + /// The number of scalars per STS for A. + 4, + /// The number of scalars per LDS for A. + 16, + /// The number of scalars per LDG for B. + 4, + /// The number of scalars per STS for B. + 4, + /// The number of scalars per LDS for B. + 16, + /// The number of scalars per LDG for C and STG for D. + 4, + /// The number of scalars per STS for D. + 4, + /// The number of scalars per LDS for D. + 4, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTileTraitsHelperA : public GemmTileTraitsHelperA {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTileTraitsHelperA + : public GemmTileTraitsHelperA { + /// The base config. + typedef GemmTileTraitsHelperA Base; + + /// The number of scalars per LDG/STS/LDS for A. + static int const kScalarsPerStsA = 16; + + /// The traits class to build the iterator to load data from global memory for A^N. + typedef IgemmContiguousGlobalTileTraits< + GemmOperand::kA, + // The layout. + MatrixLayout::kColumnMajor, + // The pointer is float const. + int8_t const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + 4> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer is float. + int8_t, + // The tile has size KxM in GEMM's terminology. + Shape, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + kScalarsPerStsA> + SharedStoreTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTileTraitsHelperB : public GemmTileTraitsHelperB {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTileTraitsHelperB + : public GemmTileTraitsHelperB { + /// The base config. + typedef GemmTileTraitsHelperB Base; + + /// The number of scalars per LDG/STS/LDS for B. + static int const kScalarsPerStsB = 16; + + /// The traits class to build the iterator to load data from global memory for B^T. + typedef IgemmContiguousGlobalTileTraits< + GemmOperand::kB, + // The layout. + MatrixLayout::kRowMajor, + // The pointer is float const. + int8_t const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + 4> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer is float. + int8_t, + // The tile has size KxM in GEMM's terminology. + Shape, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + kScalarsPerStsB> + SharedStoreTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTransformerA {}; + +template +struct IgemmTransformerA { + typedef Copy Transformer; +}; + +template +struct IgemmTransformerA { + typedef IgemmSwizzle Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTransformerB {}; + +template +struct IgemmTransformerB { + typedef Copy Transformer; +}; + +template +struct IgemmTransformerB { + typedef IgemmSwizzle Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_, + /// The output type. + typename ScalarD_, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<32, 8, 8>, + /// The index. + typename Index_ = int> +struct IgemmTraitsHelper { + /// The IGEMM config. + typedef IgemmConfig GemmConfig; + /// The GEMM config for A. + typedef IgemmTileTraitsHelperA GemmTileTraitsHelperA; + /// The GEMM config for B. + typedef IgemmTileTraitsHelperB GemmTileTraitsHelperB; + + /// The iterator to load A from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorA; + /// The default transformer for A. + typedef typename IgemmTransformerA::Transformer GlobalTransformerA; + /// The iterator to store A to shared memory. + typedef TileStoreIterator + SharedStoreIteratorA; + /// The stream to load A from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamA; + + /// The iterator to load B from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorB; + // The default transformer for B. + typedef typename IgemmTransformerB::Transformer GlobalTransformerB; + /// The iterator to store B to shared memory. + typedef TileStoreIterator + SharedStoreIteratorB; + /// The stream to load B from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamB; + + /// The iterator to load A from shared memory. + typedef TileLoadIterator + SharedLoadIteratorA; + /// The stream to load A from shared memory. + typedef SharedLoadStream > + SharedLoadStreamA; + /// The iterator to load B from shared memory. + typedef TileLoadIterator + SharedLoadIteratorB; + /// The stream to load B from shared memory. + typedef SharedLoadStream > + SharedLoadStreamB; + + /// The multiply-add functor. + typedef typename GemmConfig::MultiplyAdd MultiplyAdd; + /// The object to clear accumulators. + typedef ClearAccumulators ClearAccumulators; + + /// The epilogue. + typedef IgemmEpilogue > Epilogue; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmEpilogueScalar { + typedef float Scalar; +}; + +template <> +struct IgemmEpilogueScalar { + typedef int Scalar; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_ = Shape<32, 128, 128>, + /// The output type. + typename ScalarD_ = int, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_ = LinearScaling::Scalar>, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<32, 8, 8>, + /// The index. + typename Index_ = int, + /// The helper class. + typename Helper_ = IgemmTraitsHelper > +struct IgemmTraits : public GemmTraits< + // The config. + typename Helper_::GemmConfig, + // The stream to load A from global memory to shared memory. + typename Helper_::GlobalLoadStreamA, + // The stream to load B from global memory to shared memory. + typename Helper_::GlobalLoadStreamB, + // The stream to load A from shared memory. + typename Helper_::SharedLoadStreamA, + // The stream to load B from shared memory. + typename Helper_::SharedLoadStreamB, + // The epilogue. + typename Helper_::Epilogue, + // The block swizzle to reorganize the grid. + IdentityBlockSwizzle, + // The index. + Index_, + // The tool used to clear accumulators. + typename Helper_::ClearAccumulators> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/k_split_control.h b/cutlass/gemm/k_split_control.h deleted file mode 100644 index 7a332b0b..00000000 --- a/cutlass/gemm/k_split_control.h +++ /dev/null @@ -1,310 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Abstraction for coordinating inter-block k-splitting - */ - -#include - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * Storage and initialization - ******************************************************************************/ - -enum -{ - NumFlagsSplitK = 4096 -}; - - -/** - * Global K-split semaphore flags - * - * TODO: use demand-allocated storage to provide copies for concurrent streams - */ -__device__ int d_flags_split_k[NumFlagsSplitK]; - - -/** - * Preparation kernel for zero-initializing semaphore flags - */ -__global__ void prepare_kernel(int *d_flags_split_k) -{ - int tid = (blockIdx.x * blockDim.x) + threadIdx.x; - if (tid < NumFlagsSplitK) - d_flags_split_k[tid] = 0; -} - - -/****************************************************************************** - * k_split_control - ******************************************************************************/ - -/** - * \brief Abstraction for coordinating inter-block k-splitting - */ -struct k_split_control -{ - /// Extent of a thread block's partition along the GEMM K-axis - int split_k; - - /// Whether or not to use a semaphore for inter-block k-splitting. - bool use_semaphore; - - /// Pointer to semaphore - int *d_flags; - - - - //------------------------------------------------------------------------- - // Device API - //------------------------------------------------------------------------- - - /** - * Return the thread block's starting coordinate (k) within the - * multiplicand matrices - */ - inline __device__ - int block_begin_item_k() - { - return blockIdx.z * split_k; - } - - - /** - * Return the thread block's ending coordinate (k) within the multiplicand - * matrices (one-past) - */ - inline __device__ - int block_end_item_k(int dim_k) - { - int next_start_k = block_begin_item_k() + split_k; - return __NV_STD_MIN(next_start_k, dim_k); - } - - - /** - * Whether the thread block is a secondary accumulator in an inter-block - * k-splitting scheme - */ - inline __device__ - bool is_secondary_accumulator() - { - return (blockIdx.z > 0); - } - - - /** - * Wait for predecessor thread block(s) to produce the exclusive - * partial-sums for this block-wide tile - */ - inline __device__ - void wait() - { - // Wait on semaphore - if ((use_semaphore) && (blockIdx.z > 0)) - { - if (threadIdx.x == 0) - { - int bid = (blockIdx.y * gridDim.x) + blockIdx.x; - int hash = bid % NumFlagsSplitK; - int found; - int looking = blockIdx.z; - while (true) - { - asm volatile ("ld.global.cg.u32 %0, [%1];\n" : "=r"(found) : "l"(d_flags + hash)); - - if (found == looking) - break; - - /// Fence to keep load from being hoisted from the loop - __syncwarp(0x00000001); - } - } - - __syncthreads(); - } - } - - - /** - * Signal the successor thread_block(s) that the inclusive partial-sums - * from this block-wide tile are available - */ - inline __device__ - void signal() - { - if (use_semaphore) - { - __syncthreads(); - - if (threadIdx.x == 0) - { - int bid = (blockIdx.y * gridDim.x) + blockIdx.x; - int hash = bid % NumFlagsSplitK; - int val = blockIdx.z + 1; - - asm volatile ("st.global.cg.u32 [%0], %1;\n" : : "l"(d_flags + hash), "r"(val)); - } - } - } - - - //------------------------------------------------------------------------- - // Grid launch API - //------------------------------------------------------------------------- - - /** - * Constructor - */ - inline - k_split_control( - int *d_flags, - int sm_count, - int max_sm_occupancy, - int dim_k, - int block_tile_items_k, - dim3 block_dims, - dim3 &grid_dims) ///< [in,out] - : - d_flags(d_flags), - split_k(dim_k) - { - // Compute wave efficiency - float wave_efficiency = get_wave_efficiency( - sm_count, - max_sm_occupancy, - block_dims, - grid_dims); - - // Update split-k if wave efficiency is less than some threshold - if (wave_efficiency < 0.9) - { - int num_threadblocks = grid_dims.x * grid_dims.y * grid_dims.z; - - // Ideal number of thread blocks in grid - int ideal_threadblocks = lcm(sm_count, num_threadblocks); - - // Desired number of partitions to split K-axis into - int num_partitions = ideal_threadblocks / num_threadblocks; - - // Compute new k-split share - int new_split_k = (dim_k + num_partitions - 1) / num_partitions; - - // Round split_k share to the nearest block_task_policy_t::BlockItemsK - new_split_k = round_nearest(new_split_k, block_tile_items_k); - - // Recompute k-splitting factor with new_split_k - num_partitions = (dim_k + new_split_k - 1) / new_split_k; - - // Update grid dims and k if we meet the minimum number of iterations worth the overhead of splitting - int min_iterations_k = 8; - - if (((new_split_k / block_tile_items_k) > min_iterations_k) && // We're going to go through at least this many k iterations - (sm_count * max_sm_occupancy < NumFlagsSplitK)) // We have enough semaphore flags allocated - { - grid_dims.z = num_partitions; - split_k = new_split_k; - } - } - - use_semaphore = (grid_dims.z > 1); - } - - - /** - * Initializer - */ - cudaError_t prepare( - cudaStream_t stream, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console if DEBUG is defined. Default is \p false. - - { - cudaError error = cudaSuccess; - - if (use_semaphore) - { - int block_threads = 128; - int grid_dims = (NumFlagsSplitK + block_threads - 1) / block_threads; - - prepare_kernel<<>>(d_flags); - - // Check for failure to launch - if (CUDA_PERROR_DEBUG(error = cudaPeekAtLastError())) - return error; - - // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CUDA_PERROR_DEBUG(error = cudaStreamSynchronize(stream)))) - return error; - } - - return error; - } - - - /** - * Compute the efficiency of dispatch wave quantization - */ - float get_wave_efficiency( - int sm_count, - int max_sm_occupancy, - dim3 block_dims, - dim3 grid_dims) - { - // Heuristic for how many warps are needed to saturate an SM for a given - // multiply-accumulate genre. (NB: We could make this more rigorous by - // specializing on data types and SM width) - int saturating_warps_per_sm = 16; - - int num_threadblocks = grid_dims.x * grid_dims.y * grid_dims.z; - int threads_per_threadblock = block_dims.x * block_dims.y; - int warps_per_threadblock = threads_per_threadblock / 32; - int saturating_threadblocks_per_sm = (saturating_warps_per_sm + warps_per_threadblock - 1) / warps_per_threadblock; - - int saturating_residency = sm_count * saturating_threadblocks_per_sm; - int full_waves = num_threadblocks / saturating_residency; - int remainder_threadblocks = num_threadblocks % saturating_residency; - int total_waves = (remainder_threadblocks == 0) ? full_waves : full_waves + 1; - - float last_wave_saturating_efficiency = float(remainder_threadblocks) / saturating_residency; - - return (float(full_waves) + last_wave_saturating_efficiency) / total_waves; - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/linear_scaling.h b/cutlass/gemm/linear_scaling.h new file mode 100644 index 00000000..05afaea1 --- /dev/null +++ b/cutlass/gemm/linear_scaling.h @@ -0,0 +1,86 @@ + +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements the BLAS linear scaling function alpha*AB + beta*C +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Functor to compute linear combination of fragments +template > +struct LinearScaling { + // The scalar. + typedef Scalar_ Scalar; + // The adapater. + typedef FragmentMultiplyAdd_ FragmentMultiplyAdd; + + /// The parameters. + struct Params { + /// The alpha/beta scaling params. + Scalar alpha, beta; + + /// Initialize the parameters. + template + CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) { + alpha = desc.alpha; + beta = desc.beta; + return 0; + } + }; + + /// Ctor. + CUTLASS_DEVICE LinearScaling(Params const& params) : alpha(params.alpha), beta(params.beta) {} + + /// Evaluate the functor. + template + CUTLASS_DEVICE void evaluate(Fragment_ const& accum, Fragment_& output) { + FragmentMultiplyAdd mad; + mad.multiply(alpha, accum, output); + } + + /// Evaluate the functor. + template + CUTLASS_DEVICE void evaluate(Fragment_ const& accum, Fragment_ const& old, Fragment_& output) { + FragmentMultiplyAdd mad; + Fragment_ tmp; + mad.multiply(beta, old, tmp); + mad.multiply_add(alpha, accum, tmp, output); + } + + /// The alpha/beta scaling factors. + Scalar alpha, beta; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/sgemm_traits.h b/cutlass/gemm/sgemm_traits.h new file mode 100644 index 00000000..66b76774 --- /dev/null +++ b/cutlass/gemm/sgemm_traits.h @@ -0,0 +1,127 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies structural properties of single-precision GEMM. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_ = 1> +struct SgemmConfig + : public GemmConfig< + /// The scalar type for A. + float, + /// The scalar type for B. + float, + /// The scalar type for C. + float, + /// The scalar type for D. + float, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, float, float, float>, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 4, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 4, + /// The number of scalars per LDG for C and STG for D. + 1, + /// The number of scalars per STS for D. + 4, + /// The number of scalars per LDS for D. + 1, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_ = Shape<8, 128, 128>, + /// The functor to use in the epilogue. + typename EpilogueFunctor_ = LinearScaling, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<8, 8, 8>, + /// The number of floats loaded in one LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of floats loaded in one LDG for B. + int kScalarsPerLdgB_ = 1, + /// The index. + typename Index_ = int, + /// The SGEMM config. + typename GemmConfig_ = + SgemmConfig, + /// The traits class for the epilogue. + typename GemmEpilogueTraits_ = + SimplifiedGemmEpilogueTraits > +struct SgemmTraits : public SimplifiedGemmTraits< + // The layout for A. + kLayoutA_, + // The layout for B. + kLayoutB_, + // The config. + GemmConfig_, + // The epilogue. + GemmEpilogue, + // The index. + Index_> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/thread_accumulator.h b/cutlass/gemm/thread_accumulator.h deleted file mode 100644 index 462e1894..00000000 --- a/cutlass/gemm/thread_accumulator.h +++ /dev/null @@ -1,469 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Thread-level multiply-accumulate abstraction - */ - -#include "../util/util.h" -#include "dp_accummulate.h" - - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * thread_accumulator (generic specialization) - ******************************************************************************/ - -/** - * \brief Thread-level multiply-accumulate abstraction (generic specialization) - * - * The thread_accumulator class maintains a MxN tile of accumulators in - * registers to which MxNxK matrix products of two thread tiles A (MxK) - * and B (KxN) can be added, where: - * M = ThreadItemsY - * N = ThreadItemsX - * K = sizeof(dp_vector_t) / sizeof(value_t). - * - * In order to leverage architecture-specific "dot-product accumulate" ISA - * operations, K is dictated by the thread_accumulator class in the form of - * the member-type dp_vector_t, which defines a K-component vector of value_t. - * The multiplicand inputs A and B are provided as arrays of dp_vector_t having - * extents ThreadItemsY and ThreadItemsX, respectively. (In the single - * component "dp1" scenario where dp_vector_t == value_t and thus K == 1, the - * multiplication is simply the outer product of two vectors.) - * - * The accumulators are zero-initialized in a two-phase process (construction + - * initialization) that requires shared storage in the form of the member-type - * scratch_storage_t during construction. (A single scratch_storage_t instance - * can be uniformly referenced across all threads in the block during - * construction *if* the block is synchronized between construction and - * initialization.) - * - * NB: This generic class is not directly constructible. Architecture- and - * algorithm-specific template specializations will provide the API - * functionality prescribed here. - */ -template < - int ThreadItemsY, ///< Height of thread tile in accum_t - int ThreadItemsX, ///< Width of thread tile in accum_t - typename value_t, ///< Multiplicand value type - typename accum_t, ///< Accumulator value type - int ACCUM_BYTES = ///< Size in bytes of accum_t - sizeof(accum_t), - arch_family_t::kind_t ArchFamily = ///< Architectural family enumerant - CUTLASS_ARCH_FAMILY> -struct thread_accumulator -{ -protected: - - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - /// Specialized dot-product traits type - typedef dp_accummulate dp_accum_traits_t; - - -public: - - //------------------------------------------------------------------------- - // Member types - //------------------------------------------------------------------------- - - /// Dot-product vector type - typedef typename dp_accum_traits_t::dp_vector_t dp_vector_t; - - /// Scratch storage layout - struct scratch_storage_t {}; - - -protected: - - //------------------------------------------------------------------------- - // Data members - //------------------------------------------------------------------------- - - /// Thread's tile of accumulators - accum_t accumulators[ThreadItemsY][ThreadItemsX]; - - - //------------------------------------------------------------------------- - // Utility methods - //------------------------------------------------------------------------- - - /** - * Compute a multiply-add at accumulator coordinates (x, y) - */ - inline __device__ - void mad_xy( - dp_vector_t (&tile_a)[ThreadItemsY], - dp_vector_t (&tile_b)[ThreadItemsX], - int x, - int y) - { - dp_accum_traits_t::mad( - accumulators[y][x], - tile_a[y], - tile_b[x], - accumulators[y][x]); - } - -public: - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - thread_accumulator( - scratch_storage_t &scratch) - {} - - - //------------------------------------------------------------------------- - // Accumulator API - //------------------------------------------------------------------------- - - /** - * \brief Zero-initialize thread accumulators. - * - * If a common reference to a single block-wide shared instance of scratch_storage_t - * is used during construction, the block must be synchronized after construction - * but prior to the invocation of init(). - */ - inline __device__ - void init() - { - #pragma unroll - for (int y = 0; y < ThreadItemsY; ++y) { - #pragma unroll - for (int x = 0; x < ThreadItemsX; ++x) - { - accumulators[y][x] = accum_t(0); - } - } - } - - - /** - * Retrieve the accumulator at thread tile coordinates (x, y) - */ - inline __device__ - accum_t get(int x, int y) - { - // Accumulators are row-major - return accumulators[y][x]; - } - - - /** - * \brief Compute the product of tile_a and tile_b and add the result to - * the tile of accumulators. - */ - inline __device__ - void multiply_accumulate( - dp_vector_t (&tile_a)[ThreadItemsY], - dp_vector_t (&tile_b)[ThreadItemsX]) - { - // Simply traverse the accumulator tile in row-major order - #pragma unroll - for (int y = 0; y < ThreadItemsY; ++y) - { - #pragma unroll - for (int x = 0; x < ThreadItemsX; ++x) - { - mad_xy(tile_a, tile_b, x, y); - } - } - } -}; - - - - -/****************************************************************************** - * thread_accumulator (__half->__half specialization) - ******************************************************************************/ - -/** - * \brief Thread-level multiply-accumulate abstraction (__half->__half specialization) - * - * NB: Because we use the 2-item SIMD instruction HFMA2: - * - ThreadItemsX must be an even multiple of 2 - * - ThreadItemsY must be an even multiple of 2 - * - */ -template < - int ThreadItemsY, ///< Height in rows of thread tile in C - int ThreadItemsX, ///< Width in columns of thread tile in C - arch_family_t::kind_t ArchFamily> ///< Architectural family enumerant -struct thread_accumulator< - ThreadItemsY, - ThreadItemsX, - __half, ///< Multiplicand value type (matrices A and B) - __half, ///< Accumulator value type (matrix C and scalars) - 2, ///< Size in bytes of accum_t - ArchFamily> -{ -protected: - - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - /// Constants - enum - { - /// Height of thread tile in column-major uint32_t SIMD pairs along Y dimension - ThreadTilePairsY = divide_assert::value, - - /// Width of thread tile in column-major uint32_t SIMD pairs along X dimension - ThreadTilePairsX = ThreadItemsX, - - /// Number of SIMD pairs in thread's slice of block-wide tile multiplicand A - ThreadPairsA = divide_assert::value, - - /// Number of SIMD pairs in thread's slice of block-wide tile multiplicand B - ThreadPairsB = divide_assert::value, - }; - -public: - - //------------------------------------------------------------------------- - // Member types - //------------------------------------------------------------------------- - - /// Dot-product vector type - typedef __half dp_vector_t; - - /// Scratch storage layout - struct scratch_storage_t {}; - - -private: - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Thread's tile of C accumulator pairs (the uint32_t SIMD pairs are - /// column-major, the 2D tile layout is also column-major) - uint32_t accumulator_pairs[ThreadTilePairsX][ThreadTilePairsY]; - - - //------------------------------------------------------------------------- - // Utility methods - //------------------------------------------------------------------------- - - /** - * Compute an HFMA2 MAD - */ - inline __device__ void mad( - uint32_t &d, - const uint32_t &a, - const uint32_t &b, - const uint32_t &c) - { - - asm volatile ("fma.rn.f16x2 %0, %1, %2, %3;\n" - : "=r"(d) : "r"(a), "r"(b), "r"(c)); - } - - - /** - * Compute an HFMA2 MAD with replicated b.lo: - * d{hi} = a{hi} * b{lo} + c{hi}; - * d{lo} = a{lo} * b{lo} + c{lo}; - */ - inline __device__ void mad_replicate_low( - uint32_t &d, - const uint32_t &a, - const uint32_t &b, - const uint32_t &c) - { - // Replicate low halves of b - uint32_t replicate; - asm volatile ( - "{" - " .reg .b16 b_low,b_high;\n" - " mov.b32 {b_low,b_high}, %1;\n" - " mov.b32 %0, {b_low,b_low};\n" - "}" : "=r"(replicate) : "r"(b)); - - mad(d, a, replicate, c); - } - - - /** - * Compute an HFMA2 MAD with replicated b.hi: - * d{hi} = a{hi} * b{hi} + c{hi}; - * d{lo} = a{lo} * b{hi} + c{lo}; - */ - inline __device__ void mad_replicate_high( - uint32_t &d, - const uint32_t &a, - const uint32_t &b, - const uint32_t &c) - { - // Replicate high halves of b - uint32_t replicate; - asm volatile ( - "{" - " .reg .b16 b_low,b_high;\n" - " mov.b32 {b_low,b_high}, %1;\n" - " mov.b32 %0, {b_high,b_high};\n" - "}" : "=r"(replicate) : "r"(b)); - - mad(d, a, replicate, c); - } - - - /** - * Compute a multiply-add at accumulator SIMD-pair coordinates (pair_x, pair_y) - */ - inline __device__ - void mad_xy_even( - uint32_t (&pairs_tile_a)[ThreadPairsA], - uint32_t (&pairs_tile_b)[ThreadPairsB], - int pair_x, - int pair_y) - { - // Even column: use low half of the b pair - mad_replicate_low( - accumulator_pairs[pair_x][pair_y], - pairs_tile_a[pair_y], - pairs_tile_b[pair_x / 2], - accumulator_pairs[pair_x][pair_y]); - } - - - /** - * Compute a multiply-add at accumulator SIMD-pair coordinates (pair_x, pair_y) - */ - inline __device__ - void mad_xy_odd( - uint32_t (&pairs_tile_a)[ThreadPairsA], - uint32_t (&pairs_tile_b)[ThreadPairsB], - int pair_x, - int pair_y) - { - // Odd column: use high half of the b pair - mad_replicate_high( - accumulator_pairs[pair_x][pair_y], - pairs_tile_a[pair_y], - pairs_tile_b[pair_x / 2], - accumulator_pairs[pair_x][pair_y]); - } - - -public: - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - thread_accumulator( - scratch_storage_t &scratch) - {} - - - //------------------------------------------------------------------------- - // Accumulator API - //------------------------------------------------------------------------- - - /** - * Zero-initialize thread accumulators. - */ - inline __device__ - void init() - { - #pragma unroll - for (int y = 0; y < ThreadTilePairsY; ++y) - { - #pragma unroll - for (int x = 0; x < ThreadTilePairsX; ++x) - { - accumulator_pairs[x][y] = 0; - } - } - } - - - /** - * Retrieve the accumulator at thread tile coordinates (x, y) - */ - inline __device__ - __half get(int x, int y) - { - // SIMD pairs are column-major - uint32_t pair = accumulator_pairs[x][y / 2]; - - return reinterpret_cast<__half (&)[2]>(pair)[y % 2]; - } - - - /** - * \brief Compute the product of pairs_tile_a and pairs_tile_b and add the result to - * the tile of accumulators. - */ - inline __device__ - void multiply_accumulate( - dp_vector_t (&tile_a)[ThreadItemsY], - dp_vector_t (&tile_b)[ThreadItemsX]) - { - typedef uint32_t pairs_tile_a_t[ThreadPairsA]; - typedef uint32_t pairs_tile_b_t[ThreadPairsB]; - - // Alias slices in pairs - pairs_tile_a_t &pairs_tile_a = reinterpret_cast(tile_a); - pairs_tile_b_t &pairs_tile_b = reinterpret_cast(tile_b); - - // Simply traverse the accumulator tile in column-major order - #pragma unroll - for (int x = 0; x < ThreadTilePairsX; ++x) - { - #pragma unroll - for (int y = 0; y < ThreadTilePairsY; ++y) - { - mad_xy_even(pairs_tile_a, pairs_tile_b, x, y); - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/thread_multiply_add.h b/cutlass/gemm/thread_multiply_add.h new file mode 100644 index 00000000..20dca159 --- /dev/null +++ b/cutlass/gemm/thread_multiply_add.h @@ -0,0 +1,84 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Template implementing matrix multiply-add operations on fragments. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Template performing matrix multiply-add operation within a thread +template +struct ThreadMultiplyAdd { + /// The shape of the instruction. + typedef Shape<1, 1, 1, 1> InstructionShape; + /// The number of accumulators per thread. + typedef AccumulatorsPerThread_ AccumulatorsPerThread; + /// The number of threads per warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of accumulators per warp. + typedef typename ShapeMul::Shape AccumulatorsPerWarp; + /// The type for A. + typedef ScalarA_ ScalarA; + /// The fragment for A. + typedef Fragment FragmentA; + /// The type for B. + typedef ScalarB_ ScalarB; + /// The fragment for B. + typedef Fragment FragmentB; + /// The type for C and D. + typedef ScalarC_ ScalarC; + /// The accumulators. + typedef Fragment Accumulators; + + /// Ctor. + CUTLASS_DEVICE ThreadMultiplyAdd() {} + + /// Multiply : d = a*b + c. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { + for (int j = 0; j < AccumulatorsPerThread::kH; ++j) { + for (int i = 0; i < AccumulatorsPerThread::kW; ++i) { + d[j * AccumulatorsPerThread::kW + i] = a[i] * b[j] + c[j * AccumulatorsPerThread::kW + i]; + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/wmma_accumulator.h b/cutlass/gemm/wmma_accumulator.h deleted file mode 100644 index dfd0f851..00000000 --- a/cutlass/gemm/wmma_accumulator.h +++ /dev/null @@ -1,215 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Thread-level multiply-accumulate abstraction - * (Volta 4B accum_t specialization) - */ - -#include - -#include "../util/util.h" -#include "dp_accummulate.h" - -namespace cutlass { -namespace gemm { - - -/*! - *\brief matrix_layout to perform conversion between Cutlass types and WMMA types - */ -template -struct matrix_layout; - -/// Maps matrix_transform_t::NonTranspose to nvcuda::wmma::mem_col_major -template <> -struct matrix_layout -{ - /// Type tag in nvcuda::wmma namespace - typedef nvcuda::wmma::col_major tag; - - /// Column major layout - static const nvcuda::wmma::layout_t kind = nvcuda::wmma::mem_col_major; - - /// Cutlass matrix transform kind - static const matrix_transform_t::kind_t cutlass_kind = matrix_transform_t::NonTranspose; -}; - -/// Maps matrix_transform_t::NonTranspose to nvcuda::wmma::mem_row_major -template <> -struct matrix_layout -{ - /// Type tag in nvcuda::wmma namespace - typedef nvcuda::wmma::row_major tag; - - /// Column major layout - static const nvcuda::wmma::layout_t kind = nvcuda::wmma::mem_row_major; - - /// Cutlass matrix transform kind - static const matrix_transform_t::kind_t cutlass_kind = matrix_transform_t::Transpose; -}; - -/*! - * \brief Warp-synchronous matrix multiply-accumulate abstraction - * - * wmma_accumulator maps the CUDA WMMA API onto the GEMM structure - */ -template < - int WarpItemsY, /// Number of rows of the warp's accumulator tile - int WarpItemsX, /// Number of columns of the warp's accumulator tile - int WmmaItemsY, /// Number of rows in a single WMMA operation - int WmmaItemsX, /// Number of columns in a single WMMA operation - int WmmaItemsK, /// Inner dimension of WMMA operation - typename value_a_t, /// Type of A operand - typename value_b_t, /// Type of B operand - typename accum_t, /// Type of source and destination accumulators - matrix_transform_t::kind_t TransformA, /// Layout of A operand - matrix_transform_t::kind_t TransformB /// Layout of B operand -> -struct wmma_accumulator -{ -public: - - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - enum - { - /// Number of WMMA blocks in warp row - WmmaBlocksX = divide_assert::value, - - /// Number of WMMA blocks in a warp column - WmmaBlocksY = divide_assert::value, - }; - - /// Fragment type for matrix operand A - typedef nvcuda::wmma::fragment< - nvcuda::wmma::matrix_a, - WmmaItemsY, - WmmaItemsX, - WmmaItemsK, - value_a_t, - typename matrix_layout::tag> - fragment_a_t; - - /// Fragment type for matrix operand B - typedef nvcuda::wmma::fragment< - nvcuda::wmma::matrix_b, - WmmaItemsY, - WmmaItemsX, - WmmaItemsK, - value_b_t, - typename matrix_layout::tag> - fragment_b_t; - - /// Fragment type for accumulator - typedef nvcuda::wmma::fragment< - nvcuda::wmma::accumulator, - WmmaItemsY, - WmmaItemsX, - WmmaItemsK, - accum_t> - accumulator_t; - - /// Scratch storage layout - struct scratch_storage_t - { - /// Initialization vector - uint4 zero_slab; - }; - -public: - //------------------------------------------------------------------------- - // Data members - //------------------------------------------------------------------------- - - /// Thread's tile of accumulators - accumulator_t accumulators[WmmaBlocksX][WmmaBlocksY]; - -public: - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor initializes accumulators to zero - inline __device__ - wmma_accumulator() - { - init(); - } - - - //------------------------------------------------------------------------- - // Accumulator API - //------------------------------------------------------------------------- - - /** - * \brief Zero-initialize thread accumulators. - */ - inline __device__ - void init() - { - #pragma unroll - for (int x = 0; x < WmmaBlocksX; ++x) - { - #pragma unroll - for (int y = 0; y < WmmaBlocksY; ++y) - { - nvcuda::wmma::fill_fragment(accumulators[x][y], accum_t(0)); - } - } - } - - /** - * \brief Compute the product of tile_a and tile_b and add the result to - * the tile of accumulators. - */ - inline __device__ - void multiply_accumulate( - fragment_a_t (&tile_a)[WmmaBlocksY], - fragment_b_t (&tile_b)[WmmaBlocksX]) - { - #pragma unroll - for (int x = 0; x < WmmaBlocksX; ++x) - { - #pragma unroll - for (int y = 0; y < WmmaBlocksY; ++y) - { - nvcuda::wmma::mma_sync(accumulators[x][y], tile_a[y], tile_b[x], accumulators[x][y]); - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/wmma_gemm_epilogue_traits.h b/cutlass/gemm/wmma_gemm_epilogue_traits.h new file mode 100644 index 00000000..0fafacf9 --- /dev/null +++ b/cutlass/gemm/wmma_gemm_epilogue_traits.h @@ -0,0 +1,161 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines structural properties of WMMA GEMM's epilogue phase. +*/ +#pragma once + +#include +#ifdef CUTLASS_USE_WMMA_API + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmEpilogueTraitsHelper { + /// The scalar. + typedef typename EpilogueFunctor_::Scalar Scalar; + /// The output tile. + typedef typename GemmConfig_::OutputTile OutputTile; + + /// The number of WMMAs in the H dimension. + static int const kWmmasPerH = + GemmConfig_::AccumulatorsPerWarp::kH / GemmConfig_::InstructionShape::kH; + /// The number of iterations in the epilogue. That's the number of "horizontal" WMMAs. + typedef Shape<1, 1, kWmmasPerH> Iterations; + // The iteration strides in the H/W dimension. + typedef Shape<0, 0, 0> Delta; + /// The functor to do the math in the epilogue. + typedef EpilogueFunctor_ Functor; + + /// The traits class to build the iterator to store to shared memory for D. + typedef WmmaGemmSharedStoreTileDTraits< + // The output layout. + MatrixLayout::kColumnMajor, + // The pointer is float. + typename Functor::Scalar, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedStoreTileTraits; + + typedef WmmaMatrix + WmmaMatrix; + + /// The iterator to store D to shared memory. + typedef TileStoreIterator + SharedStoreIteratorD; + + /// The shared store transformer for D. + typedef Copy SharedStoreTransformerD; + + /// The traits class to build the iterator to load from shared memory for D. + typedef WmmaGemmSharedLoadTileDTraits< + // The pointer. + typename Functor::Scalar, + // The tile size. + typename SharedStoreIteratorD::Tile, + // The number of threads. + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsD> + SharedLoadTileTraits; + + /// The iterator to load D from shared memory. + typedef TileLoadIterator + SharedLoadIteratorD; + + /// The traits class to build the iterator to load data from global memory for C^N. + typedef WmmaGemmGlobalIteratorCdTraits< + // The pointer is float const. + typename GemmConfig_::ScalarC const, + // The tile has size (N / Iterations)xM in GEMM's terminology. + Shape<1, + GemmConfig_::OutputTile::kH / ShapeCount::kCount, + GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgC> + GlobalLoadTileTraits; + + /// The iterator to load C. + typedef WmmaGemmGlobalIteratorCd GlobalLoadIteratorC; + /// The transformer for C. + typedef Copy GlobalTransformerC; + + /// The traits class to build the iterator to store data to global memory for D^N. + typedef WmmaGemmGlobalIteratorCdTraits< + // The pointer is float. + typename GemmConfig_::ScalarD, + // The tile has size (N / Iterations)xM in GEMM's terminology. + Shape<1, + GemmConfig_::OutputTile::kH / ShapeCount::kCount, + GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerStgD> + GlobalStoreTileTraits; + + /// The iterator to store D. + typedef WmmaGemmGlobalIteratorCd GlobalStoreIteratorD; + /// The transformer for D. + typedef Copy GlobalTransformerD; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass/gemm/wmma_gemm_global_tile.h b/cutlass/gemm/wmma_gemm_global_tile.h new file mode 100644 index 00000000..32d9759a --- /dev/null +++ b/cutlass/gemm/wmma_gemm_global_tile.h @@ -0,0 +1,203 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines tile iterator traits for loading thread block-level tile from global memory. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmGlobalIteratorCdTraits : public GemmGlobalTileTraits { + /// The base class. + typedef GemmGlobalTileTraits + Base; + + /// Override the strides in each dimension between different loads/stores. + typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> Delta; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int thread_offset_h = threadIdx.x / Base::Threads::kW; + int thread_offset_w = threadIdx.x % Base::Threads::kW * Base::ThreadsDelta::kW; + + return make_Coord(0, thread_offset_h, thread_offset_w, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmGlobalIteratorCd : public TileIteratorBase { + /// This class. + typedef WmmaGemmGlobalIteratorCd This_; + /// The traits. + typedef TileTraits_ Traits; + /// The base class. + typedef TileIteratorBase + Base; + /// Override the strides in each dimension between different loads/stores. + typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> ImmediateOffsetStrides; + /// The layout. + static MatrixLayout::Kind const kLayout = TileTraits_::kLayout; + + /// The scalar. + typedef typename TileTraits_::Scalar Scalar; + /// The pointer. + typedef typename TileTraits_::Pointer Pointer; + /// The threads. + typedef typename TileTraits_::Threads Threads; + /// The index. + typedef Index_ Index; + /// The thread offset functor. + typedef typename TileTraits_::ThreadOffset ThreadOffset; + + /// The params. + struct Params { + /// The pointer. + Pointer pointer; + /// The stride in the H dimension to setup the thread in the block. + Index stride_h; + /// The strides to increment the pointer. + Index inc_h, inc_advance; + /// The column offset to compute the predicate for the columns. + Index predicate_offset; + /// The strides to increment the predicate offset. + Index predicate_inc_h, predicate_inc_advance; + + /// Setup the params. + CUTLASS_HOST_DEVICE int initialize( + Pointer pointer, Index ld, Index n, Index epilogue_stride_w, Index epilogue_delta_w) { + // The pointer. + this->pointer = pointer; + // Setup the base stride. One "group of threads" per column. + stride_h = ld; + // Each thread output 1 column per iteration. . + inc_h = ld * TileTraits_::Threads::kH; + inc_advance = inc_h + epilogue_stride_w; + + predicate_offset = n; + predicate_inc_h = TileTraits_::Threads::kH; + predicate_inc_advance = predicate_inc_h + epilogue_delta_w; + + // It worked. + return 0; + } + }; + + Params params; + + Coord<4> thread_offset; + + /// Ctor. + CUTLASS_DEVICE WmmaGemmGlobalIteratorCd() {} + + /// Ctor. + CUTLASS_DEVICE WmmaGemmGlobalIteratorCd(Params const& params, + const Coord<3>& bounds, + const Coord<3>& block, + int const pointer_offset = 0, + int const pred_offset = 0, + ThreadOffset thread_offset_func = ThreadOffset()) + + : params(params) { + thread_offset = thread_offset_func(); + // Each warp works on a different column of the tile. + int const h = thread_offset[1] + block[1]; + // Each lane writes a different element. + int const w = thread_offset[2] + block[2]; + // Setup the pointer. + this->params.pointer += ((h * params.stride_h + w) + pointer_offset); + + // Prepare the vector of predicates. + for (int i = 0; i < Base::Iterations::kW; ++i) { + predicates.set(i, w + i * Base::Delta::kW < bounds[2]); + } + this->params.predicate_offset -= (h + pred_offset); + } + + /// Increment the pointer in the C dimension. + CUTLASS_DEVICE void inc_c() {} + /// Increment the pointer in the W dimension. + CUTLASS_DEVICE void inc_w() {} + /// Increment the pointer in the H dimension. + CUTLASS_DEVICE void inc_h() { + params.pointer += params.inc_h; + params.predicate_offset -= params.predicate_inc_h; + } + /// Increment the pointer in the D dimension. + CUTLASS_DEVICE void inc_d() {} + /// Increment the pointer to move to the next iteration. + CUTLASS_DEVICE void inc_advance() { + params.pointer += params.inc_advance; + params.predicate_offset -= params.predicate_inc_advance; + } + + /// Test the predicate. + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { + return predicates.at(w) && params.predicate_offset > 0; + } + + /// Returns the raw pointer + CUTLASS_HOST_DEVICE + Pointer data() { return params.pointer; } + + CUTLASS_HOST_DEVICE + Pointer const data() const { return params.pointer; } + + /// The predicates for the row. + cutlass::PredicateVector predicates; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/wmma_gemm_multiply_add.h b/cutlass/gemm/wmma_gemm_multiply_add.h new file mode 100644 index 00000000..5968350e --- /dev/null +++ b/cutlass/gemm/wmma_gemm_multiply_add.h @@ -0,0 +1,108 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements warp-level matrix multiply-accumulate operation using CUDA WMMA API. +*/ +#pragma once + +#include +#ifdef CUTLASS_USE_WMMA_API +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmMultiplyAdd { + /// The shape of the instruction. + typedef InstructionShape_ InstructionShape; + /// The number of threads per warp. That's a dummy configuration. + typedef Shape<1, InstructionShape_::kH, InstructionShape_::kW> ThreadsPerWarp; + /// The dimensions. + typedef AccumulatorsPerWarp_ AccumulatorsPerWarp; + /// The type for A. + typedef ScalarA_ ScalarA; + /// The type for B. + typedef ScalarB_ ScalarB; + /// The type for C and D. + typedef ScalarC_ ScalarC; + /// The number of iterations. + typedef typename ShapeDiv::Shape Iterations; + + /// The element for A. + typedef WmmaMatrix ElementA; + /// The fragment for A. + typedef Fragment FragmentA; + + /// The element for B. + typedef WmmaMatrix ElementB; + /// The fragment for B. + typedef Fragment FragmentB; + + /// The element for C. + typedef WmmaMatrix ElementC; + /// The fragment for C. + typedef Fragment Accumulators; + + /// Ctor. + CUTLASS_DEVICE WmmaGemmMultiplyAdd() {} + + /// Multiply : d = a*b. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { + for (int j = 0; j < Iterations::kH; ++j) { + for (int i = 0; i < Iterations::kW; ++i) { + // The input elements. + ElementA const& elt_a = a[i]; + ElementB const& elt_b = b[j]; + ElementC const& elt_c = c[j * Iterations::kW + i]; + + // The output element. + ElementC& elt_d = d[j * Iterations::kW + i]; + + // The wmma instruction. + nvcuda::wmma::mma_sync(elt_d, elt_a, elt_b, elt_c); + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass/gemm/wmma_gemm_shared_tile.h b/cutlass/gemm/wmma_gemm_shared_tile.h new file mode 100644 index 00000000..7d15b260 --- /dev/null +++ b/cutlass/gemm/wmma_gemm_shared_tile.h @@ -0,0 +1,240 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines iterator traits for efficiently loading and storing fragment to and from shared + memory, specialized for WMMA GEMM. +*/ +#pragma once + +#include +#ifdef CUTLASS_USE_WMMA_API + +#include +#include + +namespace cutlass { +namespace gemm { + +template +struct Debug {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmSharedLoadTileATraits { + /// The operand. + static GemmOperand::Kind const kOperand = GemmOperand::kA; + /// The layout. + static MatrixLayout::Kind const kLayout = kLayout_; + /// The scalar. + typedef Scalar_ Scalar; + /// The pointer. + typedef Scalar const* Pointer; + /// The access size + static int const kAccessSize = 1; + /// The tile with skew. + typedef Tile_ Tile; + /// The number of warps. + typedef Warps_ Warps; + /// The warps strides. + static int const kWarpStride = kWarpStride_; + /// The number of iterations. + typedef Iterations_ Iterations; + /// The strides between iterations. + typedef Delta_ Delta; + /// The strides between iterations. + typedef Delta_ ImmediateOffsetStrides; + /// The shape of the WMMA instruction. + typedef WmmaShape_ WmmaShape; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + /// ThreadOffset + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // The warp id. + int const warp = threadIdx.x / kWarpSize; + // The offset. + int const offset = warp % Warps::kW * kWarpStride; + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmSharedLoadTileBTraits { + /// The operand. + static GemmOperand::Kind const kOperand = GemmOperand::kB; + /// The layout. + static MatrixLayout::Kind const kLayout = kLayout_; + /// The scalar. + typedef Scalar_ Scalar; + /// The pointer. + typedef Scalar const* Pointer; + /// The access size + static int const kAccessSize = 1; + /// The tile with skew. + typedef Tile_ Tile; + /// The number of warps. + typedef Warps_ Warps; + /// The warps strides. + static int const kWarpStride = kWarpStride_; + /// The number of iterations. + typedef Iterations_ Iterations; + /// The strides between iterations. + typedef Delta_ Delta; + /// The strides between iterations. + typedef Delta_ ImmediateOffsetStrides; + /// The shape of the WMMA instruction. + typedef WmmaShape_ WmmaShape; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + /// ThreadOffset + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // The warp id. + int const warp = threadIdx.x / kWarpSize; + // The offset. + int const offset = warp / Warps::kW * kWarpStride; + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmSharedStoreTileDTraits { + /// The operand. + static GemmOperand::Kind const kOperand = GemmOperand::kC; + /// The layout. + static MatrixLayout::Kind const kLayout = kLayout_; + /// The scalar. + typedef Scalar_ Scalar; + // The access size + static int const kAccessSize = 1; + /// The pointer. + typedef Scalar* Pointer; + /// The number of warps. + typedef Warps_ Warps; + /// The shape of the WMMA instruction. + typedef WmmaShape_ WmmaShape; + /// The skew. + static int const kSkew = kSkew_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + /// The tile with skew. + typedef Shape<1, Warps_::kH * WmmaShape_::kH, OutputTile_::kW + kSkew_> Tile; + /// The number of iterations needed to store the tile. + typedef Shape<1, 1, OutputTile_::kW / Warps::kW / WmmaShape_::kW> Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, 0, Warps::kW * WmmaShape_::kW, 0> Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, 0, Warps::kW * WmmaShape_::kW, 0> ImmediateOffsetStrides; + + /// ThreadOffset + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // The warp id. + int const warp = threadIdx.x / kWarpSize; + // The starting column. + int const h = warp / Warps::kW * WmmaShape::kH; + // The w. + int const w = warp % Warps::kW * WmmaShape::kW; + // The offset. + int const offset = h * Tile::kW + w; + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmSharedLoadTileDTraits { + /// The scalar. + typedef Scalar_ Scalar; + /// The pointer. + typedef Scalar const* Pointer; + /// The access size + static int const kAccessSize = kScalarsPerLds_; + /// The tile. + typedef typename ReshapeTile::Tile Tile; + /// The threads. + typedef typename ReshapeThreads::Threads Threads; + /// The threads strides. + typedef Shape<1, Tile::kW * Tile::kC, Tile::kC> ThreadsStrides; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The strides in each dimension between different loads/stores. + typedef Shape<0, Threads::kH * ShapeCount::kWc, Threads::kW * kScalarsPerLds_> Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, Threads::kH * ShapeCount::kWc, Threads::kW * kScalarsPerLds_> + ImmediateOffsetStrides; + /// The number of iterations needed to load/store the tile. + typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kScalarsPerLds_> + Iterations; + + /// ThreadOffset + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // The offset. + int const offset = ComputeThreadOffsetFromStrides::get(); + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass/gemm/wmma_gemm_traits.h b/cutlass/gemm/wmma_gemm_traits.h new file mode 100644 index 00000000..79012016 --- /dev/null +++ b/cutlass/gemm/wmma_gemm_traits.h @@ -0,0 +1,574 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies structural properties of GEMM targeting WMMA API in CUDA. +*/ +#pragma once + +#include +#ifdef CUTLASS_USE_WMMA_API + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// The output type. + typename ScalarC_, + /// The accumulator type. + typename Accumulator_, + /// The number of accumulators per warp. + typename AccumulatorsPerWarp_, + /// The shape of the WMMA instruction. + typename InstructionShape_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_> +struct WmmaGemmConfig : public GemmConfig< + /// The scalar type for A. + half, + /// The scalar type for B. + half, + /// The scalar type for C. + ScalarC_, + /// The scalar type for D. + ScalarC_, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + WmmaGemmMultiplyAdd, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 8, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 8, + /// The number of scalars per LDG for C and STG for D. + 16 / sizeof(ScalarC_), + /// The number of scalars per STS for D. + 16 / sizeof(ScalarC_), + /// The number of scalars per LDS for D. + 16 / sizeof(ScalarC_), + /// The number of stages in shared memory. + 1> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperA {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperA + : public GemmTileTraitsHelperA { + /// The base config. + typedef GemmTileTraitsHelperA Base; + + /// The skew. + static int const kSkew = 16 / sizeof(typename Base::MultiplyAddScalar); + /// The shared tile size. + typedef Shape + Tile; + + /// WMMA matrix + typedef WmmaMatrix + WmmaMatrix; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + typename Base::MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename Base::GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsA> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW; + /// The number of scalars loaded per iteration. + static int const kScalarsPerIteration = Tile::kW * GemmConfig_::InstructionShape::kD; + /// The traits class to build the iterator to load from shared memory for A. + typedef WmmaGemmSharedLoadTileATraits< + // The layout of the matrix. + MatrixLayout::kColumnMajor, + // The pointer. + typename Base::MultiplyAddScalar, + // The output tile size. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperA { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarA Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar; + + /// WMMA matrix + typedef WmmaMatrix + WmmaMatrix; + + /// The traits class to build the iterator to load data from global memory for A^T. + typedef GemmGlobalTileTraits< + // That's A. + GemmOperand::kA, + // A is row-major. + MatrixLayout::kRowMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgA> + GlobalTileTraits; + + /// The skew. + static int const kSkew = 16 / sizeof(MultiplyAddScalar); + /// The tile. + typedef Shape + Tile; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsA> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW; + /// The traits class to build the iterator to load from shared memory for A. + typedef WmmaGemmSharedLoadTileATraits< + // The layout of the matrix. + MatrixLayout::kRowMajor, + // The pointer. + MultiplyAddScalar, + // The tile in shared memory. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kW * Tile::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperB {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperB + : public GemmTileTraitsHelperB { + /// The base config. + typedef GemmTileTraitsHelperB Base; + + /// The skew. + static int const kSkew = 16 / sizeof(typename Base::MultiplyAddScalar); + /// The shared tile size. + typedef Shape + Tile; + + /// WMMA matrix + typedef WmmaMatrix + WmmaMatrix; + + /// The traits class to build the iterator to store data to shared memory for B^T. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + typename Base::MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename Base::GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsB> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH; + /// The number of scalars loaded per iteration. + static int const kScalarsPerIteration = Tile::kW * GemmConfig_::InstructionShape::kD; + /// The traits class to build the iterator to load from shared memory for B. + typedef WmmaGemmSharedLoadTileBTraits< + // The layout of the matrix. + MatrixLayout::kRowMajor, + // The pointer. + typename Base::MultiplyAddScalar, + // The output tile size. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kH, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperB { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarB Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar; + + /// WMMA matrix + typedef WmmaMatrix + WmmaMatrix; + + /// The traits class to build the iterator to load data from global memory for B^N. + typedef GemmGlobalTileTraits< + // That's B. + GemmOperand::kB, + // A is row-major. + MatrixLayout::kColumnMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgB> + GlobalTileTraits; + + /// The skew. + static int const kSkew = 16 / sizeof(MultiplyAddScalar); + /// The tile. + typedef Shape + Tile; + + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsB> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH; + /// The traits class to build the iterator to load from shared memory for B. + typedef WmmaGemmSharedLoadTileBTraits< + // The layout of the matrix. + MatrixLayout::kColumnMajor, + // The pointer. + MultiplyAddScalar, + // The tile in shared memory. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kH * Tile::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_, + /// The output type. + typename ScalarC_, + /// The accumulator type. + typename Accumulator_, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_, + /// The number of accumulators per warp. + typename AccumulatorsPerWarp_, + /// The shape of the WMMA instruction. + typename InstructionShape_, + /// The number of halfs loaded in one LDG for A. + int kScalarsPerLdgA_, + /// The number of halfs loaded in one LDG for B. + int kScalarsPerLdgB_, + /// The index. + typename Index_> +struct WmmaGemmTraitsHelper { + /// The WMMA GEMM config. + typedef WmmaGemmConfig + GemmConfig; + + /// The GEMM config for A. + typedef WmmaGemmTileTraitsHelperA GemmTileTraitsHelperA; + /// The GEMM config for B. + typedef WmmaGemmTileTraitsHelperB GemmTileTraitsHelperB; + + /// The iterator to load A from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorA; + /// The default transformer for A. + typedef Copy GlobalTransformerA; + /// The iterator to store A to shared memory. + typedef TileStoreIterator + SharedStoreIteratorA; + /// The stream to load A from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamA; + + /// The iterator to load B from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorB; + // The default transformer for B. + typedef Copy GlobalTransformerB; + /// The iterator to store B to shared memory. + typedef TileStoreIterator + SharedStoreIteratorB; + /// The stream to load B from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamB; + + /// The iterator to load A from shared memory. + typedef TileLoadIterator + SharedLoadIteratorA; + /// The stream to load A from shared memory. + typedef SharedLoadStream SharedLoadStreamA; + /// The iterator to load B from shared memory. + typedef TileLoadIterator + SharedLoadIteratorB; + /// The stream to load B from shared memory. + typedef SharedLoadStream SharedLoadStreamB; + + /// The functor to do the multiply-add in the main loop. + typedef typename GemmConfig::MultiplyAdd MultiplyAdd; + /// The object to clear accumulators. + typedef ClearAccumulators ClearAccumulators; + + /// The helper to create the epilogue traits. + typedef WmmaGemmEpilogueTraitsHelper EpilogueTraitsHelper; + /// The traits class for the epilogue. + typedef SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + /// The epilogue. + typedef GemmEpilogue Epilogue; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template > +struct WmmaGemmAccumulatorsPerWarp { + typedef typename ShapeMin::Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The tile size for the GEMM KxNxM. + typename OutputTile_ = Shape<64, 128, 128>, + /// The output type. + typename ScalarC_ = float, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_ = LinearScaling, + /// The accumulator type. + typename Accumulator_ = ScalarC_, + /// The number of accumulators per warp. + typename AccumulatorsPerWarp_ = typename WmmaGemmAccumulatorsPerWarp::Shape, + /// The shape of the WMMA instruction. + typename InstructionShape_ = Shape<16, 16, 16>, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_ = 8, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_ = 8, + /// The index. + typename Index_ = int, + /// The helper class. + typename Helper_ = WmmaGemmTraitsHelper > +struct WmmaGemmTraits : public GemmTraits< + // The config. + typename Helper_::GemmConfig, + // The stream to load A from global memory to shared memory. + typename Helper_::GlobalLoadStreamA, + // The stream to load B from global memory to shared memory. + typename Helper_::GlobalLoadStreamB, + // The stream to load A from shared memory. + typename Helper_::SharedLoadStreamA, + // The stream to load B from shared memory. + typename Helper_::SharedLoadStreamB, + // The epilogue. + typename Helper_::Epilogue, + // The block swizzle to reorganize the grid. + IdentityBlockSwizzle, + // The index. + Index_, + // The tool used to clear accumulators. + typename Helper_::ClearAccumulators> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass/iterator_access.h b/cutlass/iterator_access.h new file mode 100644 index 00000000..db87e0d1 --- /dev/null +++ b/cutlass/iterator_access.h @@ -0,0 +1,325 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Free functions for loading and storing to implementations of tile iteartor concepts. +*/ +#pragma once + +#include +#include +#include +#include + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Loads a fragment from an input iterator +template +CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment) { + typename InputIterator::FragmentIterator frag_iterator(fragment); + for (int d = 0; d < InputIterator::Iterations::kD; ++d) { + for (int h = 0; h < InputIterator::Iterations::kH; ++h) { + for (int w = 0; w < InputIterator::Iterations::kW; ++w) { + for (int c = 0; c < InputIterator::Iterations::kC; ++c) { + if (iterator.valid(d, h, w, c)) { + int const offset = + ComputeOffsetFromStrides::get( + 0, 0, w, c); + Load:: + load(reinterpret_cast( + frag_iterator.at(d, h, w, c)), + iterator.data(), + offset); + } + } + if (w < InputIterator::Iterations::kW - 1) { + iterator.inc_w(); + } + } + if (h < InputIterator::Iterations::kH - 1) { + iterator.inc_h(); + } + } + if (d < InputIterator::Iterations::kD - 1) { + iterator.inc_d(); + } + } + iterator.inc_advance(); +} + +/// Loads a fragment from a shared memory input iterator +template +CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment) { + typename InputIterator::FragmentIterator frag_iterator(fragment); + for (int d = 0; d < InputIterator::Iterations::kD; ++d) { + for (int h = 0; h < InputIterator::Iterations::kH; ++h) { + for (int w = 0; w < InputIterator::Iterations::kW; ++w) { + for (int c = 0; c < InputIterator::Iterations::kC; ++c) { + int const offset = + ComputeOffsetFromStrides::get( + d, h, w, c); + + FragmentLoad::load(frag_iterator.at(d, h, w, c), + iterator.data(), + offset); + } + } + } + } +} + +/// Loads a fragment from a shared memory input iterator +template +CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment, int d) { + typename InputIterator::FragmentIterator frag_iterator(fragment); + for (int h = 0; h < InputIterator::Iterations::kH; ++h) { + for (int w = 0; w < InputIterator::Iterations::kW; ++w) { + for (int c = 0; c < InputIterator::Iterations::kC; ++c) { + int const offset = + ComputeOffsetFromStrides::get( + d, h, w, c); + + FragmentLoad::load(frag_iterator.at(0, h, w, c), + iterator.data(), + offset); + } + } + } +} + +/// Loads a fragment from an input iterator, masked by a predicate iterator +template +CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, + Fragment &fragment, + typename InputIterator::Index offset, + ConstPredicateAdapter predicate_adapter) { + for (int d = 0; d < InputIterator::Iterations::kD; ++d, iterator.inc_d()) { + for (int h = 0; h < InputIterator::Iterations::kH; ++h, iterator.inc_h()) { + for (int w = 0; w < InputIterator::Iterations::kW; ++w, iterator.inc_w()) { + if (predicate_adapter.at(d, h, w, 0)) { + int idx = InputIterator::Tile::kC * + (w + InputIterator::Iterations::kW * (h + InputIterator::Iterations::kH * d)); + + Load:: + load(reinterpret_cast(fragment[idx]), + iterator.data(), + offset); + } + } + } + } +} + +/// Loads a fragment from an input iterator +template +CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, + Fragment &fragment, + typename InputIterator::Index offset = 0) { + TrivialPredicateTileAdapter pred; + iterator_load_post_increment(iterator, fragment, offset, pred); +} + +/// Loads a fragment from an input iterator +template +CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, + Fragment &fragment, + ConstPredicateAdapter pred_it) { + iterator_load_post_increment(iterator, fragment, 0, pred_it); +} + +template +CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &_iterator, + Fragment &fragment, + typename InputIterator::Index offset, + ConstPredicateAdapter predicate_adapter) { + InputIterator iterator(_iterator); + iterator_load_post_increment(iterator, fragment, offset, predicate_adapter); +} + +/// Loads a fragment from an input iterator +template +CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator, + Fragment &fragment, + typename InputIterator::Index offset = 0) { + TrivialPredicateTileAdapter pred; + iterator_load(iterator, fragment, offset, pred); +} + +/// Loads a fragment from an input iterator +template +CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator, + Fragment &fragment, + ConstPredicateAdapter pred_it) { + iterator_load(iterator, fragment, 0, pred_it); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Stores a fragment to an output iterator +template +CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment) { + typename OutputIterator::FragmentIterator frag_iterator(fragment); + for (int d = 0; d < OutputIterator::Iterations::kD; ++d) { + for (int h = 0; h < OutputIterator::Iterations::kH; ++h) { + for (int w = 0; w < OutputIterator::Iterations::kW; ++w) { + if (iterator.valid(d, h, w, 0)) { + int const offset = + ComputeOffsetFromStrides::get( + d, h, w, 0); + + Store:: + store(reinterpret_cast( + frag_iterator.at(d, h, w, 0)), + iterator.data(), + offset); + } + if (w < OutputIterator::Iterations::kW - 1) { + iterator.inc_w(); + } + } + if (h < OutputIterator::Iterations::kH - 1) { + iterator.inc_h(); + } + } + if (d < OutputIterator::Iterations::kD - 1) { + iterator.inc_d(); + } + } + iterator.inc_advance(); +} + +/// Stores a fragment to a shared memory output iterator +template +CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment) { + typename OutputIterator::FragmentConstIterator frag_iterator(fragment); + for (int d = 0; d < OutputIterator::Iterations::kD; ++d) { + for (int h = 0; h < OutputIterator::Iterations::kH; ++h) { + for (int w = 0; w < OutputIterator::Iterations::kW; ++w) { + for (int c = 0; c < OutputIterator::Iterations::kC; ++c) { + int const offset = + ComputeOffsetFromStrides::get( + d, h, w, c); + + FragmentStore::store(frag_iterator.at(d, h, w, c), + iterator.data(), + offset); + } + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Stores a fragment to an output iterator, masked by a predicate iterator +template +CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, + Fragment const &fragment, + typename OutputIterator::Index offset, + ConstPredicateAdapter predicate_adapter) { + for (int d = 0; d < OutputIterator::Iterations::kD; ++d, iterator.inc_d()) { + for (int h = 0; h < OutputIterator::Iterations::kH; ++h, iterator.inc_h()) { + for (int w = 0; w < OutputIterator::Iterations::kW; ++w, iterator.inc_w()) { + if (predicate_adapter.at(d, h, w, 0)) { + int idx = OutputIterator::Tile::kC * + (w + OutputIterator::Iterations::kW * (h + OutputIterator::Iterations::kH * d)); + + Store:: + store(reinterpret_cast(fragment[idx]), + iterator.data(), + offset); + } + } + } + } +} + +/// Stores a fragment to an output iterator +template +CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, + Fragment const &fragment, + typename OutputIterator::Index offset = 0) { + TrivialPredicateTileAdapter pred; + iterator_store_post_increment(iterator, fragment, offset, pred); +} + +/// Stores a fragment to an output iterator +template +CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, + Fragment const &fragment, + ConstPredicateAdapter pred_it) { + iterator_store_post_increment(iterator, fragment, 0, pred_it); +} + +/// Stores a fragment to an output iterator, masked by a predicate iterator +template +CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &_iterator, + Fragment const &fragment, + typename OutputIterator::Index offset, + ConstPredicateAdapter predicate_adapter) { + OutputIterator iterator(_iterator); + iterator_store_post_increment(iterator, fragment, offset, predicate_adapter); +} + +/// Stores a fragment to an output iterator +template +CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator, + Fragment const &fragment, + typename OutputIterator::Index offset = 0) { + TrivialPredicateTileAdapter pred; + iterator_store(iterator, fragment, offset, pred); +} + +/// Stores a fragment to an output iterator +template +CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator, + Fragment const &fragment, + ConstPredicateAdapter pred_it) { + iterator_store(iterator, fragment, 0, pred_it); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/load_store.h b/cutlass/load_store.h new file mode 100644 index 00000000..d3d0ce81 --- /dev/null +++ b/cutlass/load_store.h @@ -0,0 +1,199 @@ +/*************************************************************************************************** + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines abstractions for efficiently loading and storing vectors to memory. +*/ +#pragma once + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Enum to specify which memory space data resides in. +*/ +struct MemorySpace { + enum Kind { + kGeneric, // Data accessed through pointer dereferencing + kShared, // Data resides in shared memory + kGlobal // Data resides in global memory + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template 1), + size_t = (sizeof(Scalar_) * Lanes_)> +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The load function. + static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + dst = reinterpret_cast(&pointer[offset])[0]; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + dst.registers[0] = reinterpret_cast(&pointer[offset])[0]; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + uint2 tmp = reinterpret_cast(&pointer[offset])[0]; + dst.registers[0] = tmp.x; + dst.registers[1] = tmp.y; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void load(AccessType& dst, double const* pointer, int offset) { + double2 tmp = reinterpret_cast(&pointer[offset])[0]; + dst[0] = tmp.x; + dst[1] = tmp.y; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + uint4 tmp = reinterpret_cast(&pointer[offset])[0]; + dst.registers[0] = tmp.x; + dst.registers[1] = tmp.y; + dst.registers[2] = tmp.z; + dst.registers[3] = tmp.w; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template 1), + size_t = (sizeof(Scalar_) * Lanes_)> +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + pointer[offset] = src; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + uint32_t* addr = reinterpret_cast(&pointer[offset]); + addr[0] = src.registers[0]; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + uint2* addr = reinterpret_cast(&pointer[offset]); + addr[0] = make_uint2(src.registers[0], src.registers[1]); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& src, double* pointer, int offset) { + double2* addr = reinterpret_cast(&pointer[offset]); + addr[0] = make_double2(src[0], src[1]); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + uint4* addr = reinterpret_cast(&pointer[offset]); + addr[0] = make_uint4(src.registers[0], src.registers[1], src.registers[2], src.registers[3]); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/matrix_traits.h b/cutlass/matrix_traits.h new file mode 100644 index 00000000..77e8b706 --- /dev/null +++ b/cutlass/matrix_traits.h @@ -0,0 +1,48 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines properties of matrices used to denote layout and operands to GEMM kernels. +*/ +#pragma once + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Describes layouts of matrices +struct MatrixLayout { + enum Kind { kRowMajor, kColumnMajor }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Gemm operand - D = A * B + C +struct GemmOperand { + enum Kind { kA, kB, kC, kD }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/predicate_vector.h b/cutlass/predicate_vector.h new file mode 100644 index 00000000..81668577 --- /dev/null +++ b/cutlass/predicate_vector.h @@ -0,0 +1,493 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines container classes and iterators for managing a statically sized vector + of boolean predicates. +*/ +#pragma once + +#include + +#include +#include + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup predicate_vector_concept Predicate Vector Concept +@{ + +Implementations of \ref predicate_vector_concept contain an ordered set of boolean predicates which +may be used as conditionals in other device-side operations. Both random access and iterators +offering sequential access are provided. + +@par Predicate Vector + A \ref predicate_vector_concept satisfies the following expressions + - at(int idx) - returns the value of the indexed predicate + - set(int idx, bool value) - sets the value of the indexed predicate + - begin() - returns a \ref predicate_iterator_concept pointing to the first predicate + +@} +*/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup predicate_iterator_concept Predicate Iterator Concept +@{ + +Implementations of \ref predicate_iterator_concept enables accessing and traversing elements of a +bit vector. + +@par Const Predicate Iterator + A const \ref predicate_iterator_concept satisfies the following expressions + - ++it increments the iterator to the next predicate + - *it returns the value of the currently pointed-to predicate + +@par Mutable Predicate Iterator + A \ref predicate_iterator_concept that is non-const also satisfies the following expressions + - it.set(bool value) sets the value of the currently pointed-to predicate + +@} +*/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup predicate_tile_adapter Predicate Tile Adapter Concept +@{ + +Implementations of \ref predicate_tile_adapter provide a mapping between a the elements of a \ref +tile_traits_concept and a \ref predicate_vector_concept. + +@par Predicate Tile Adapter + A \ref predicate_tile_adapter satisfies the following expressions + - at(int d, int h, int w, int c) - returns the value of a predicate corresponding to the + access (d, h, w, c) within the tile. + +@} +*/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Statically sized array of bits implementing @concept{predicate_vector_concept}. +template < + /// Number of predicates conatined in predicate vector + int kPredicates_, + /// Number of predicates contained in each byte of internal storage + int kPredicatesPerByte_ = 4, + /// Location of first predicate within byte of internal storage + int kPredicateStart_ = 0> +struct PredicateVector { + /// Number of bits stored by the PredicateVector + static int const kPredicates = kPredicates_; + + /// Number of bits stored within each byte of the predicate bit vector + static int const kPredicatesPerByte = kPredicatesPerByte_; + + /// First bit withing each byte containing predicates + static int const kPredicateStart = kPredicateStart_; + + // Make sure no one tries to put more than 8 bits in a byte :) + static_assert(kPredicatesPerByte <= 8, "kPredicatesPerByte must fit within an actual byte"); + // Make sure the "offsetted" bits fit in one byte. + static_assert(kPredicateStart + kPredicatesPerByte < 8, + "The offsetted predicates must fit within an actual byte."); + + /// Storage type of individual elements + typedef uint32_t Storage; + + /// Number of bytes needed + static int const kBytes = (kPredicates + kPredicatesPerByte - 1) / kPredicatesPerByte; + + /// Number of storage elements needed + static int const kWordCount = (kBytes + sizeof(Storage) - 1) / sizeof(Storage); + + private: + // + // Data members + // + + /// Words of bit vector + Storage storageData[kWordCount]; + + // + // Methods + // + + /// Computes the word and bit corresponding to a logical predicate index + CUTLASS_HOST_DEVICE void computeStorageOffset(int &word, int &bit, int idx) const { + CUTLASS_ASSERT(idx < kPredicates); + + int byte = (idx / kPredicatesPerByte); + int bit_offset = (idx % kPredicatesPerByte); + + word = byte / sizeof(Storage); + int byte_offset = (byte % sizeof(Storage)); + + bit = byte_offset * 8 + bit_offset + kPredicateStart; + } + + /// Accesses a given word with optional assertions + CUTLASS_HOST_DEVICE Storage &storage(int word) { + CUTLASS_ASSERT(word < kWordCount); + return storageData[word]; + } + + /// Accesses a given word with optional assertions + CUTLASS_HOST_DEVICE Storage const &storage(int word) const { + CUTLASS_ASSERT(word < kWordCount); + return storageData[word]; + } + + public: + // + // Iterator + // + + /** + * @brief A const iterator implementing \ref predicate_iterator_concept enabling sequential + * read-only access to prediactes. + * @concept{predicate_iterator_concept} + */ + class ConstIterator { + /// Reference to PredicateVector instance + PredicateVector const &vec_; + + /// Index into PredicateVector + int bit_; + + public: + /// Copy constructor + CUTLASS_HOST_DEVICE + ConstIterator(ConstIterator const &it) : vec_(it.vec_), bit_(it.bit_) {} + + /// + CUTLASS_HOST_DEVICE + ConstIterator(PredicateVector const &_vec, int _start = 0) : vec_(_vec), bit_(_start) {} + + /// Pre-increment + CUTLASS_HOST_DEVICE + ConstIterator &operator++() { + ++bit_; + return *this; + } + + /// Pre-decrement + CUTLASS_HOST_DEVICE + ConstIterator &operator--() { + --bit_; + return *this; + } + + /// Post-increment + CUTLASS_HOST_DEVICE + ConstIterator operator++(int) { + ConstIterator ret(*this); + ret.bit_++; + return ret; + } + + /// Post-decrement + CUTLASS_HOST_DEVICE + ConstIterator operator--(int) { + ConstIterator ret(*this); + ret.bit_--; + return ret; + } + + /// Returns true if iterators point to the same bit + CUTLASS_HOST_DEVICE + bool operator==(ConstIterator const &it) const { return bit_ == it.bit_; } + + /// Returns false if iterators point to the same bit + CUTLASS_HOST_DEVICE + bool operator!=(ConstIterator const &it) const { return bit_ != it.bit_; } + + /// Dereferences iterator + CUTLASS_HOST_DEVICE + bool operator*() const { return vec_[bit_]; } + }; + + /** + * @brief An iterator implementing \ref predicate_iterator_concept enabling sequential + * read and write access to predicates. + * @concept{predicate_iterator_concept} + */ + class Iterator { + /// Reference to PredicateVector instance + PredicateVector &vec_; + + /// Index into PredicateVector + int bit_; + + public: + /// Copy constructor + CUTLASS_HOST_DEVICE + Iterator(Iterator const &it) : vec_(it.vec_), bit_(it.bit_) {} + + /// Constructs an iterator from a PredicateVector + CUTLASS_HOST_DEVICE + Iterator(PredicateVector &_vec, int _start = 0) : vec_(_vec), bit_(_start) {} + + /// Pre-increment + CUTLASS_HOST_DEVICE + Iterator &operator++() { + ++bit_; + return *this; + } + + /// Pre-decrement + CUTLASS_HOST_DEVICE + Iterator &operator--() { + --bit_; + return *this; + } + + /// Post-increment + CUTLASS_HOST_DEVICE + Iterator operator++(int) { + Iterator ret(*this); + ret.bit_++; + return ret; + } + + /// Post-decrement + CUTLASS_HOST_DEVICE + Iterator operator--(int) { + Iterator ret(*this); + ret.bit_--; + return ret; + } + + /// Returns true if iterators point to the same bit + CUTLASS_HOST_DEVICE + bool operator==(Iterator const &it) const { return bit_ == it.bit_; } + + /// Returns false if iterators point to the same bit + CUTLASS_HOST_DEVICE + bool operator!=(Iterator const &it) const { return bit_ != it.bit_; } + + /// Gets the bit at the pointed to location + CUTLASS_HOST_DEVICE + bool get() { return vec_[bit_]; } + + /// Dereferences iterator + CUTLASS_HOST_DEVICE + bool operator*() const { return vec_[bit_]; } + + /// Sets the bit at the pointed to location + CUTLASS_HOST_DEVICE + void set(bool value = true) { vec_.set(bit_, value); } + }; + + /// Iterator that always returns true + struct TrivialIterator { + /// Constructor + CUTLASS_HOST_DEVICE + TrivialIterator() {} + + /// Copy constructor + CUTLASS_HOST_DEVICE + TrivialIterator(Iterator const &it) {} + + /// Constructs an iterator from a PredicateVector + CUTLASS_HOST_DEVICE + TrivialIterator(PredicateVector const &_vec) {} + + /// Pre-increment + CUTLASS_HOST_DEVICE + TrivialIterator &operator++() { return *this; } + + /// Post-increment + CUTLASS_HOST_DEVICE + TrivialIterator operator++(int) { return *this; } + + /// Dereferences iterator + CUTLASS_HOST_DEVICE + bool operator*() const { return true; } + }; + + public: + // + // Methods + // + + /// Initialize the predicate vector + CUTLASS_HOST_DEVICE PredicateVector(bool value = true) { fill(value); } + + /// Fills all predicates with a given value + CUTLASS_HOST_DEVICE void fill(bool value = true) { + Storage item = (value ? ~Storage(0) : Storage(0)); + + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < kWordCount; ++i) { + storage(i) = item; + } + } + + /// Accesses a bit within the predicate vector. + CUTLASS_HOST_DEVICE bool operator[](int idx) const { return at(idx); } + + /// Accesses a bit within the predicate vector. + CUTLASS_HOST_DEVICE bool at(int idx) const { + int bit, word; + computeStorageOffset(word, bit, idx); + + return ((storage(word) >> bit) & 1); + } + + /// Set a bit within the predicate vector. + CUTLASS_HOST_DEVICE void set(int idx, bool value = true) { + int bit, word; + computeStorageOffset(word, bit, idx); + + Storage disable_mask = (~(Storage(1) << bit)); + Storage enable_mask = (Storage(value) << bit); + + storage(word) = ((storage(word) & disable_mask) | enable_mask); + } + + /// Computes the intersection of two identical predicate vectors. + CUTLASS_HOST_DEVICE PredicateVector &operator&=(PredicateVector const &predicates) { + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < kWordCount; ++i) { + storage(i) = (storage(i) & predicates.storage(i)); + } + return *this; + } + + /// Computes the union of two identical predicate vectors. + CUTLASS_HOST_DEVICE PredicateVector &operator|=(PredicateVector const &predicates) { + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < kWordCount; ++i) { + storage(i) = (storage(i) | predicates.storage(i)); + } + return *this; + } + + /// Returns true if entire predicate array is zero. + CUTLASS_HOST_DEVICE bool is_zero() const { + Storage mask(0); + for (int byte = 0; byte < sizeof(Storage); ++byte) { + Storage byte_mask = (((1 << kPredicatesPerByte) - 1) << kPredicateStart); + mask |= (byte_mask << (byte * 8)); + } + uint32_t result = 0; + for (int word = 0; word < kWordCount; ++word) { + result |= storage(word); + } + return result == 0; + } + + /// Returns an iterator to the start of the bit vector + CUTLASS_DEVICE + Iterator begin() { return Iterator(*this); } + + /// Returns an iterator + CUTLASS_DEVICE + Iterator end() { return Iterator(*this, kPredicates); } + + /// Returns a ConstIterator + CUTLASS_DEVICE + ConstIterator const_begin() const { return ConstIterator(*this); } + + /// Returns a ConstIterator + CUTLASS_DEVICE + ConstIterator const_end() const { return ConstIterator(*this, kPredicates); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Always returns true predicate. +struct TrivialPredicateTileAdapter { + /// Ctor. + CUTLASS_HOST_DEVICE TrivialPredicateTileAdapter() {} + + /// The value at location (d, h, w, c). + CUTLASS_HOST_DEVICE bool at(int, int, int, int) const { return true; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to enable random access to predicates via logical coordinate within a tile. +template +struct PredicateTileAdapter { + /// The vector of predicates. + typedef PredicateVector_ PredicateVector; + /// The iterations. + typedef Iterations_ Iterations; + + private: + /// The predicates. + PredicateVector &predicates; + + public: + /// Ctor. + CUTLASS_DEVICE PredicateTileAdapter(PredicateVector &predicates_) : predicates(predicates_) {} + + /// Get the value at location (d, h, w, c). + CUTLASS_DEVICE bool at(int d, int h, int w, int c) const { + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + return predicates.at(bit); + } + + /// Set the value at location (d, h, w, c). + CUTLASS_DEVICE void set(int d, int h, int w, int c, bool value) { + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + predicates.set(bit, value); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to enable random access to predicates via logical coordinate within a tile. +template +struct ConstPredicateTileAdapter { + /// The vector of predicates. + typedef PredicateVector_ PredicateVector; + /// The iterations. + typedef Iterations_ Iterations; + + private: + /// The predicates. + PredicateVector const &predicates; + + public: + /// Ctor. + CUTLASS_DEVICE ConstPredicateTileAdapter(PredicateVector const &predicates_) + : predicates(predicates_) {} + + /// Get the value at location (d, h, w, c). + CUTLASS_DEVICE bool at(int d, int h, int w, int c) const { + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + return predicates.at(bit); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/reshape_tile.h b/cutlass/reshape_tile.h new file mode 100644 index 00000000..55aebfca --- /dev/null +++ b/cutlass/reshape_tile.h @@ -0,0 +1,58 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a type for restructuring a tile. +*/ +#pragma once + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// The following functor reshapes a tile of data. The goal is to have at least kAccessSize in +// the inner-most dimension. If the user respects that constraint, there is nothing to be done. If +// that's not the case, this functor will correct that and "extract" the right number of elements +// from the next dimension. + +template +struct ReshapeTile { + typedef Tile_ Tile; +}; + +template +struct ReshapeTile { + // Make sure the W dimension of the tile is large enough. + static_assert(Tile_::kW >= kAccessSize_, "The W dimension is too small"); + // Make sure the dimension can be divided by the number of scalars. + static_assert(Tile_::kW % kAccessSize_ == 0, "Not supported"); + // Collapse the W dimension. + typedef Shape Tile; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/shape.h b/cutlass/shape.h new file mode 100644 index 00000000..f0f63d9c --- /dev/null +++ b/cutlass/shape.h @@ -0,0 +1,301 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines Shape implementing the Layout concept for representing a 4D hypercube of objects. +*/ +#pragma once + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup layout_concept Layout Concept +* @{ +* @par Implementations of \ref layout_concept are used to describe a cube with DxHxW elements and C +scalars per element. + A HxW slice of a cube is called an image and a cube consists of D images. +* +* @par Notations +* Let Layout be an implementation of the \ref layout_concept. +* +* @par Valid Expressions +* - Layout::D specifies the depth of a cube +* - Layout::H specifies the height of a cube +* - Layout::W specifies the height of a cube +* - Layout::C specifies the number of channels of each element in a cube +* - Layout::W_c specifies the number of scalars of each row in one image of a cube. +* - Layout::H_w specifies the number of elements in an image slice. +* - Layout::H_w_c_specifies the number of scalars in an image slice. +* - Layout::D_h_w specifies the number of elements in a cube. +* - Layout::D_h_w_c specifies the number of scalars in a cube. +* - Layout::Strides is a \ref layout_concept specifying the strides. +* @} +*/ + +/** +* @brief A Shape implementing \ref layout_concept describing the dimensions of a cube. +* @concept{layout_concept} +*/ +template +struct Shape { + /// The depth of the cube. + static int const kD = kD_; + /// The height of the cube. + static int const kH = kH_; + /// The width of the cube. + static int const kW = kW_; + /// The number of scalars per element. + static int const kC = kC_; +}; + +/** +* @brief Compute derived counted of a \ref layout_concept based class +*/ +template +struct ShapeCount { + /// The number of elements per row. + static int const kWc = Shape::kW * Shape::kC; + /// The number of pixels per image. + static int const kHw = Shape::kH * Shape::kW; + /// The number of elements per image. + static int const kHwc = Shape::kH * kWc; + /// The number of pixels per cube. + static int const kDhw = Shape::kD * kHw; + /// The number of elements in the 4D space. + static int const kDhwc = Shape::kD * kHwc; + /// The number of elements in the 4D space. + static int const kCount = kDhwc; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeScale { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeAdd { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeSub { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeMul { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeDiv { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeMax { + typedef Shape<(A_::kD > B_::kD ? A_::kD : B_::kD), + (A_::kH > B_::kH ? A_::kH : B_::kH), + (A_::kW > B_::kW ? A_::kW : B_::kW), + (A_::kC > B_::kC ? A_::kC : B_::kC)> + Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeMin { + typedef Shape<(A_::kD < B_::kD ? A_::kD : B_::kD), + (A_::kH < B_::kH ? A_::kH : B_::kH), + (A_::kW < B_::kW ? A_::kW : B_::kW), + (A_::kC < B_::kC ? A_::kC : B_::kC)> + Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeStrides { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube +* @tparam A \ref layout_concept where each dimension of the cube specifies the corresponding stride. +*/ +template +struct ComputeOffsetFromShape { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { + // clang-format off + return d * Shape_::kH * Shape_::kW * Shape_::kC + + h * Shape_::kW * Shape_::kC + + w * Shape_::kC + + c; + // clang-format on + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube with a depth of 1 +* @tparam kSh Elements in the H dimension +* @tparam kSw Elements in the W dimension +* @tparam kSc Separation between two elements in "elements" +*/ +template +struct ComputeOffsetFromShape > { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { + return h * kSw_ * kSc_ + w * kSc_ + c; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube with one channel and a depth of 1 +* @tparam kSh Elements in the H dimension +* @tparam kSw Elements in the W dimension +*/ +template +struct ComputeOffsetFromShape > { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * kSw_ + w; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube +* @tparam A \ref layout_concept where each dimension of the cube specifies the corresponding stride. +*/ +template +struct ComputeOffsetFromStrides { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { + return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube with a depth of 1 +* @tparam S_h Stride in the H dimension in scalars +* @tparam S_w Stride in the W dimension in scalars +* @tparam S_c Stride between two scalars. +*/ +template +struct ComputeOffsetFromStrides > { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { + return h * S_h_ + w * S_w_ + c * S_c_; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube with one channel and a depth of 1 +* @tparam S_h Stride in the H dimension in scalars +* @tparam S_w Stride in the W dimension in scalars +*/ +template +struct ComputeOffsetFromStrides > { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * S_h_ + w * S_w_; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. +* Afterwards compute the offset of those coordinates using Strides_ +* @tparam Threads_ The dimension of the cube the threadIdx.x value is mapped on +* @tparam Strides_ The strides to use when compute the offsets based on the coordinates of the cube. +*/ +template +struct ComputeThreadOffsetFromStrides { + static CUTLASS_DEVICE int get() { + // Decompose the thread index. + int c = threadIdx.x % Threads_::kC; + int w = threadIdx.x / Threads_::kC % Threads_::kW; + int h = threadIdx.x / Threads_::kC / Threads_::kW % Threads_::kH; + int d = threadIdx.x / Threads_::kC / Threads_::kW / Threads_::kH; + + // Compute the offset. + return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +/** +*@brief Specialization for D=1 +*/ +template +struct ComputeThreadOffsetFromStrides, Shape<1, S_h_, S_w_, S_c_> > { + static CUTLASS_DEVICE int get() { + // Decompose the thread index. + int c = threadIdx.x % T_c_; + int w = threadIdx.x / T_c_ % T_w_; + int h = threadIdx.x / T_c_ / T_w_ % T_h_; + + // Compute the offset. + return h * S_h_ + w * S_w_ + c * S_c_; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +*@brief Specialization for D=1 and C=1 +*/ +template +struct ComputeThreadOffsetFromStrides, Shape<1, S_h_, S_w_, 1> > { + static CUTLASS_DEVICE int get() { + // Decompose the thread index. + int w = threadIdx.x % T_w_; + int h = threadIdx.x / T_w_; + + // Compute the offset. + return h * S_h_ + w * S_w_; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tensor_ref.h b/cutlass/tensor_ref.h new file mode 100644 index 00000000..8ef31e3b --- /dev/null +++ b/cutlass/tensor_ref.h @@ -0,0 +1,151 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a structure containing strides, bounds, and a pointer to tensor data. +*/ +#pragma once + +#include + +#include +#include +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Structure modeling a pointer and stride into a tensor +template +class TensorRef { + public: + /// Data type of individual access + typedef Storage_ Storage; + + /// Rank of tensor + static int const Rank = Rank_; + + private: + // + // Data members + // + + /// Pointer to storage element + Storage* ptr_; + + /// Stride information + Coord stride_; + + public: + // + // Methods + // + + /// Default ctor + CUTLASS_HOST_DEVICE + TensorRef() : ptr_(nullptr) {} + + /// Constructs from a pointer, size, and stride + CUTLASS_HOST_DEVICE + TensorRef(Storage* ptr, Coord stride) : ptr_(ptr), stride_(stride) {} + + /// Updates the pointer, stride, and location within a TensorRef + CUTLASS_HOST_DEVICE + void reset(Storage* ptr = nullptr, Coord stride = Coord(0)) { + ptr_ = ptr; + stride_ = stride; + } + + /// Conversion function + template + TensorRef convert() { + Coord converted_stride; + for (int i = 0; i < Rank - 1; ++i) { + converted_stride[i] = stride_[i] * Extent::kValue / Extent::kValue; + } + converted_stride[Rank - 1] = stride_[Rank - 1]; + + return TensorRef(reinterpret_cast(ptr_), converted_stride); + } + + /// Returns true if the TensorRef may be safely accessed + CUTLASS_HOST_DEVICE + bool good() const { return ptr_ != nullptr; } + + /// Returns the pointer to referenced data + CUTLASS_HOST_DEVICE + Storage* data() const { return ptr_; } + + /// Returns the stride of the tensor + CUTLASS_HOST_DEVICE + Coord const& stride() const { return stride_; } + + /// Returns the stride of the tensor in the given dimension + CUTLASS_HOST_DEVICE + int const& stride(int dim) const { return stride_.at(dim); } + + /// Returns the maximum stride element as the 'leading dimension' + CUTLASS_HOST_DEVICE + int leading_dim() const { return __NV_STD_MAX(stride_[1], stride_[2]); } + + /// Computes the offset of an index from the origin of the tensor + CUTLASS_HOST_DEVICE + long long offset(Coord const& coord) const { + return stride_.template dot(coord); + } + + /// Returns a reference to the element at a given Coord + CUTLASS_HOST_DEVICE + Storage& at(Coord const& coord) const { return ptr_[offset(coord)]; } + + /// Element-wise accessor + Storage& operator[](Coord const& coord) const { return at(coord); } + + /// Returns a reference to the element at a given Coord + CUTLASS_HOST_DEVICE + Storage& at(int idx) const { return ptr_[idx]; } + + /// Element-wise accessor + Storage& operator[](int idx) const { return at(idx); } + + /// Adds an offset to the pointer + CUTLASS_HOST_DEVICE + TensorRef& advance(Coord const& b) { + ptr_ += offset(b); + return *this; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorRef operator+(Coord const& b) const { return TensorRef(ptr_ + offset(b), stride_); } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorRef operator-(Coord const& b) const { return TensorRef(ptr_ - offset(b), stride_); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tensor_view.h b/cutlass/tensor_view.h new file mode 100644 index 00000000..89c6bd57 --- /dev/null +++ b/cutlass/tensor_view.h @@ -0,0 +1,172 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a structure containing strides and a pointer to tensor data. +*/ + +#pragma once + +#include + +#include +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Host-side reference implementation of tensor operations +template +class TensorView : public TensorRef { + public: + /// Reference and stride + typedef TensorRef Base; + + /// Reference and stride + typedef Base TensorRef_t; + + /// Reference to constant type + typedef TensorRef ConstTensorRef_t; + + /// Rank of tensor + static int const Rank = TensorRef_t::Rank; + + /// Type used to compute the offset of an element to the base of a tensor + typedef int Offset_t; + + /// Coordinate into tensor + typedef Coord Coord_t; + + private: + // + // Data members + // + + /// Pointer to pitch-linear memory + TensorRef_t ref_; + + /// Dimensions of coordinate (independent of stride) + Coord_t size_; + + public: + // + // Device and Host Methods + // + + /// Default constructor + CUTLASS_HOST_DEVICE + TensorView() {} + + /// Constructs a Tensor_view from a TensorRef and size + CUTLASS_HOST_DEVICE + TensorView(TensorRef_t const& _ref, Coord_t const& _size) : Base(_ref), size_(_size) {} + + /// Returns true if the Tensor_view is bound to some memory + CUTLASS_HOST_DEVICE + bool good() const { return ref().good(); } + + /// Returns a pointer to data + CUTLASS_HOST_DEVICE + T* data() const { return ref().data(); } + + /// Updates the reference and size of a Tensor_view object + CUTLASS_HOST_DEVICE + void reset(TensorRef_t const& _ref = TensorRef_t(0), Coord_t const& _size = Coord_t()) { + Base::operator=(_ref); + size_ = _size; + } + + /// Accesses the tensor reference pointing to data + CUTLASS_HOST_DEVICE + TensorRef_t& ref() { return *this; } + + /// + CUTLASS_HOST_DEVICE + ConstTensorRef_t const_ref() { return ConstTensorRef_t(data(), stride()); } + + /// Accesses the tensor reference pointing to data + CUTLASS_HOST_DEVICE + TensorRef_t const& ref() const { return *this; } + + /// Accesses the size + CUTLASS_HOST_DEVICE + Coord_t const& size() const { return size_; } + + /// Accesses the size + CUTLASS_HOST_DEVICE + int size(int dim) const { return size_.at(dim); } + + /// Accesses the stride + CUTLASS_HOST_DEVICE + Coord_t const& stride() const { return ref().stride(); } + + /// Accesses the stride + CUTLASS_HOST_DEVICE + int const& stride(int dim) const { return ref().stride(dim); } + + /// Assigns the Tensor_view + CUTLASS_HOST_DEVICE + TensorView& operator=(TensorView const& _tensor) { + Base::operator=(_tensor._ref); + size_ = _tensor.size_; + return *this; + } + + /// Returns the index of an element + CUTLASS_HOST_DEVICE + Offset_t offset(Coord_t const& coord) const { return ref().offset(coord); } + + /// Determines whether a location is within a tensor + CUTLASS_HOST_DEVICE + bool contains(Coord_t const& coord) const { + for (int dim = 0; dim < Rank; ++dim) { + if (coord.at(dim) >= size_.at(dim)) { + return false; + } + } + return true; + } + + /// Element-wise accessor + CUTLASS_HOST_DEVICE + T& at(Coord_t const& coord) const { return ref().at(coord); } + + /// Element-wise accessor + T& operator[](Coord const& coord) const { return at(coord); } + + /// Element-wise accessor + CUTLASS_HOST_DEVICE + T& at(Offset_t idx) const { return ref().at(idx); } + + /// Returns a Tensor_view given location and size quantities + CUTLASS_HOST_DEVICE + TensorView subview(Coord_t const& location, Coord_t size) const { + return TensorView(ref() + location, size.clamp(size_ - location)); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tile_iterator.h b/cutlass/tile_iterator.h new file mode 100644 index 00000000..6543cebf --- /dev/null +++ b/cutlass/tile_iterator.h @@ -0,0 +1,881 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines the Tile Traits concept and iterators for loading and storing to tiles + efficiently. +*/ +#pragma once + +#include +#include +#include +#include + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup tile_traits_concept Tile Traits Concept +@{ + +\ref tile_traits_concept is a type definining the shape of a tile and the distribution of accesses +by individual entities, either threads or other. + +@par Tile Traits Concept + Types satisfying \ref tile_traits_concept define the following members + - Tile - a type satisfying \ref layout_concept describing the dimensions of the tile + - Delta - a type satisfying \ref layout_concept describing the increments between accesses +along each dimension + - Iterations - a type satisfying \ref layout_concept describing the number of accesses +along each dimension + - Offset - the type of a functor computing the offset of each participating entity +as a Coord<4>. +@} +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Specifies dimension in which post-increment accesses advance +struct IteratorAdvance { + enum Kind { kD, kH, kW }; +}; + +/// Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix +struct IteratorFragment { + enum Kind { kScalar, kWmmaMatrix }; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief A template defining \ref tile_traits_concept +* @concept{tile_traits_concept} +*/ +template +struct TileTraits { + /// Shape of the tile + typedef Tile_ Tile; + + /// Number of steps between accesses along each dimension + typedef Delta_ Delta; + + /// Number of accesses performed + typedef Iterations_ Iterations; + + /// Functor that returns the logical coordinate of each entity's initial offset in the tile + typedef ThreadOffset_ ThreadOffset; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Iterator for accessing a stripmined tile in memory +template > +struct TileIteratorBase { + /// concept TileTraits + typedef Traits_ Traits; + + /// Scalar element + typedef Scalar_ Scalar; + + /// Fragment element + typedef FragmentElement_ FragmentElement; + + /// Specifies dimension in which post-increment accesses advance. + static IteratorAdvance::Kind const kAdvance = Advance_; + + /// Specifies iterator storage fragment type (Scalar or WmmaMatrix) + static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_; + + /// Source or destination memory space + static MemorySpace::Kind const kMemorySpace = MemorySpace; + + /// Index type + typedef Index_ Index; + + /// Skew quantity + typedef Skew_ Skew; + + /// Tile shape + typedef typename Traits::Tile Tile; + + /// Distance along each dimension + typedef typename Traits::Delta Delta; + + /// The strides in each dimension between different loads/stores. + typedef typename Traits::ImmediateOffsetStrides ImmediateOffsetStrides; + + /// Iterations + typedef typename Traits::Iterations Iterations; + + /// Thread offset + typedef typename Traits::ThreadOffset ThreadOffset; + + /// The number of scalars accessed per load/store. + static int const kAccessSize = Tile::kC; + + /// The elements loaded/store by one instruction. + typedef typename Vectorize::Type AccessType; + + /// The size of storage needed per fragment + static int const kFragmentSize = + (kIteratorFragment == IteratorFragment::kWmmaMatrix ? 16 : sizeof(AccessType)); + /// The storage. + typedef Fragment::kCount, kFragmentSize> Storage; + /// The fragment. + typedef Fragment::kCount * kAccessSize> Fragment; + /// The fragment iterator. + typedef FragmentIterator FragmentIterator; + /// The fragment const iterator. + typedef FragmentConstIterator FragmentConstIterator; + /// The shape of the fragment. + typedef typename FragmentIterator::FragmentShape FragmentShape; + + /// Default predicate mask type + typedef PredicateVector::kCount> PredicateVector; + + // + // Params struct + // + + /// Parameters to the iterator + struct Params { + Index stride_d; + Index stride_h; + Index stride_w; + + Index inc_d; + Index inc_h; + Index inc_w; + + Index inc_advance; + + /// Initializes params + CUTLASS_HOST_DEVICE + int initialize(Index _stride_d, + Index _stride_h, + Index _stride_w, + Index _inc_d, + Index _inc_h, + Index _inc_w, + Index _inc_advance) { + stride_d = _stride_d; + stride_h = _stride_h; + stride_w = _stride_w; + + inc_d = _inc_d; + inc_h = _inc_h; + inc_w = _inc_w; + inc_advance = _inc_advance; + + return 0; + } + + CUTLASS_HOST_DEVICE + int initialize(Index _stride_d, Index _stride_h, Index _stride_w) { + stride_d = _stride_d; + stride_h = _stride_h; + stride_w = _stride_w; + + inc_w = stride_w * Delta::kW; + inc_h = stride_h * Delta::kH - stride_w * Delta::kW * (Iterations::kW - 1); + + if (kAdvance == IteratorAdvance::kH) { + // Advance in the H dimension. + inc_d = 0; + } else if (kAdvance == IteratorAdvance::kW) { + // Advance in the W dimension. + inc_d = stride_w * Tile::kW - stride_h * Tile::kH; + } else { + // Advance in the D dimension. + inc_d = stride_d; + } + + inc_advance = 0; + + return 0; + } + + CUTLASS_HOST_DEVICE int initialize() { + stride_d = 0; + stride_h = 0; + stride_w = 1; + + inc_d = inc_h = inc_w = inc_advance = 0; + + return 0; + } + }; + + /// Is the iterator valid? + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; } + + // + // Static function members + // + + /// Initializes a predicate vector + template + CUTLASS_DEVICE static void initialize_predicates(PredicateIterator predicate_it, + Coord<3> const &bounds, + Coord<3> const &offset = make_Coord(0, 0, 0)) { + for (int d = 0; d < Iterations::kD; ++d) { + bool enable_d = (d * Delta::kD + offset[0] < bounds[0]); + for (int h = 0; h < Iterations::kH; ++h) { + bool enable_h = (h * Delta::kH + offset[1] < bounds[1]); + for (int w = 0; w < Iterations::kW; ++w) { + bool enable_w = (w * Tile::kC * Delta::kW + offset[2] < bounds[2]); + predicate_it.set(d, h, w, 0, enable_d && enable_h && enable_w); + } + } + } + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup tile_load_iterator_concept Tile Load Iterator Concept +@{ + +\ref tile_load_iterator_concept enables loading a tile from addressable memory into a fragment + +@par Tile Load Iterator Concept + Types satisfying \ref tile_load_iterator_concept define the following members + - PredicateVector - a \ref predicate_vector_concept with sufficient predicate storage for +each access implied by the tile traits + - Fragment - the destination fragment type satisfying \ref fragment_concept + - initialize_predicates(pred_it, bounds, block_offset) - function initializing a predicate +vector according to externally specified bounds + - load_post_increment(fragment, pred_it) - a method that loads a fragment and increments +the iterator to the next tile, guarded by a \ref predicate_iterator_concept + - load_post_increment(fragment) - a method that loads a fragment and increments the +iterator to the next tile + - load(fragment, pred_it) - a const method that loads a fragment, guarded by a \ref +predicate_iterator_concept + - load(fragment) - a method that loads a fragment + +@} +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief An iterator implementing \ref tile_load_iterator_concept for loading a tile from memory +* @concept{tile_load_iterator_concept} +*/ +template > +struct TileLoadIterator : public TileIteratorBase { + /// Base class + typedef TileIteratorBase + Base; + + /// concept TileTraits + typedef typename Base::Traits Traits; + + /// Scalar element + typedef typename Base::Scalar Scalar; + + /// Fragment element + typedef typename Base::FragmentElement FragmentElement; + + /// Specifies in which dimension post-increment accesses advance. + static IteratorAdvance::Kind const kAdvance = Base::kAdvance; + + /// Specifies type of iterator fragment storage (Salar or WmmaMatrix) + static IteratorFragment::Kind const kIteratorFragment = Base::kIteratorFragment; + + /// Source or destination memory space + static MemorySpace::Kind const kMemorySpace = Base::kMemorySpace; + + /// Index type + typedef typename Base::Index Index; + + /// Skew quantity + typedef typename Base::Skew Skew; + + /// Tile shape + typedef typename Base::Tile Tile; + + /// Delta + typedef typename Base::Delta Delta; + + /// Iterations + typedef typename Base::Iterations Iterations; + + /// ThreadOffset functor + typedef typename Base::ThreadOffset ThreadOffset; + + /// Fragment type + typedef typename Base::FragmentShape FragmentShape; + + /// Memory access type + typedef typename Base::AccessType AccessType; + + /// Fragment definition + typedef typename Base::Fragment Fragment; + + /// Fragment iterator definition + typedef typename Base::FragmentIterator FragmentIterator; + + /// Fragment const iterator definition + typedef typename Base::FragmentConstIterator FragmentConstIterator; + + /// Default predicate mask type + typedef typename Base::PredicateVector PredicateVector; + + /// Storage object that may be loaded from + typedef typename Base::Storage SharedStorage; + + /// IteratorBase parameters + typedef typename Base::Params BaseParams; + + /// Do we require a fence? + enum { kRequiresLoadFence = Tile::kD == 1 }; + + /// The pointer type + typedef Scalar const *Pointer; + + /// Parameters + struct Params : public BaseParams { + /// Pointer to memory + Scalar const *pointer; + + /// Initialize params to access storage object + CUTLASS_HOST_DEVICE + int initialize(SharedStorage const &storage) { + pointer = &storage[0]; + return 0; + } + + /// Initializes params to access a raw pointer + CUTLASS_HOST_DEVICE + int initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w) { + Base::Params::initialize(stride_d, stride_h, stride_w); + pointer = ptr; + return 0; + } + + /// Initializes params + CUTLASS_HOST_DEVICE + int initialize(Scalar const *ptr, + Index _stride_d, + Index _stride_h, + Index _stride_w, + Index _inc_d, + Index _inc_h, + Index _inc_w, + Index _inc_advance) { + pointer = ptr; + Base::Params::initialize( + _stride_d, _stride_h, _stride_w, _inc_d, _inc_h, _inc_w, _inc_advance); + return 0; + } + + // Initializes params to default values + CUTLASS_HOST_DEVICE + int initialize() { return Base::Params::initialize(); } + }; + + // + // Data members + // + + /// Parameters structure + Params params; + + /// Offset of an individual lane from the start of the tile + Coord<4> thread_offset; + + /// Stage argument enables wrapping after some number of tiles have been loaded. + int stage; + + // + // Static member functions + // + + /// Initializes a predicate vector + template + CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, + Coord<3> const &bounds, + Coord<3> const &block_offset = make_Coord(0, + 0, + 0)) { + Base::initialize_predicates( + predicate_it, + bounds, + block_offset + make_Coord(0, thread_offset[1], thread_offset[2] * Tile::kC)); + } + + // + // Methods + // + + /// Default constructor + CUTLASS_HOST_DEVICE + TileLoadIterator() {} + + /// Constructs a tile load iterator + CUTLASS_HOST_DEVICE + TileLoadIterator(Params const &_params, + Coord<3> const &block_offset = make_Coord(0, 0, 0), + ThreadOffset thread_offset_func = ThreadOffset()) + : params(_params), stage(0) { + thread_offset = thread_offset_func(); + + Index block_offset_h = 0; + Index block_offset_w = 0; + if (kAdvance == IteratorAdvance::kH) { + block_offset_h = block_offset[1]; + block_offset_w = block_offset[2]; + } else { + block_offset_h = block_offset[2]; + block_offset_w = block_offset[1]; + } + + params.pointer += block_offset[0] * params.stride_d + + (block_offset_h + thread_offset[1]) * params.stride_h + + (block_offset_w + thread_offset[2] * Tile::kC) / Tile::kC * params.stride_w; + } + + /// Constructs a tile load iterator + CUTLASS_HOST_DEVICE + TileLoadIterator(Params const &, + SharedStorage &shared_storage, + Coord<3> const &block_offset = make_Coord(0, 0, 0), + ThreadOffset thread_offset_func = ThreadOffset()) + : stage(0) { + int const offset = thread_offset_func()[2]; + params.pointer = &shared_storage[offset]; + } + + /// Returns the current pointer + CUTLASS_HOST_DEVICE + Scalar const *data() const { return params.pointer; } + + /// Increment in the D dimension + CUTLASS_HOST_DEVICE void inc_d() { params.pointer += params.inc_d; } + + /// Increment in the H dimension + CUTLASS_HOST_DEVICE void inc_h() { params.pointer += params.inc_h; } + + /// Increment in the W dimension + CUTLASS_HOST_DEVICE void inc_w() { params.pointer += params.inc_w; } + + /// Increment in the next dimension + CUTLASS_HOST_DEVICE void inc_advance() { params.pointer += params.inc_advance; } + + /// Increment the stage. + CUTLASS_DEVICE void inc_stage() { + if (Tile::kD > 1) { + int const kStageSize = Tile::kH * Tile::kW * Tile::kC; + if (stage == Tile::kD - 1) { + params.pointer -= (Tile::kD - 1) * kStageSize; + stage = 0; + } else { + params.pointer += kStageSize; + stage = stage + 1; + } + } + } + + public: + /// Loads a fragment and advances the iterator to the next tile. + template + CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it) { + FragmentIterator frag_iterator(fragment); + + for (int d = 0; d < Iterations::kD; ++d) { + for (int h = 0; h < Iterations::kH; ++h) { + for (int w = 0; w < Iterations::kW; ++w, ++pred_it) { + if (*pred_it) { + Load::load( + reinterpret_cast(frag_iterator.at(d, h, w, 0)), data(), 0); + } + + if (w < Iterations::kW - 1) { + inc_w(); + } + } + if (h < Iterations::kH - 1) { + inc_h(); + } + } + if (d < Iterations::kD - 1) { + inc_d(); + } + } + inc_advance(); + } + + /// Loads a fragment and advances the iterator to the next tile. + template + CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment) { + typename PredicateVector::TrivialIterator pred_it; + load_post_increment(fragment, pred_it); + } + + /// Loads a fragment without advancing the iterator.. + template + CUTLASS_HOST_DEVICE void load(Fragment &fragment, PredicateIterator pred_it) const { + TileLoadIterator _load_it(*this); + _load_it.load_post_increment(fragment, pred_it); + } + + /// Loads a fragment without advancing the iterator.. + template + CUTLASS_HOST_DEVICE void load(Fragment &fragment) const { + typename PredicateVector::TrivialIterator pred_it; + load(fragment, pred_it); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup tile_store_iterator_concept Tile Store Iterator Concept +@{ + +\ref tile_store_iterator_concept enables storing a tile to addressable memory + +@par Tile Store Iterator Concept + Types satisfying \ref tile_load_iterator_concept define the following members + - PredicateVector - a \ref predicate_vector_concept with sufficient predicate storage for +each access implied by the tile traits + - Fragment - the destination fragment type satisfying \ref fragment_concept + - initialize_predicates(pred_it, bounds, block_offset) - function initializing a predicate +vector according to externally specified bounds + - store_post_increment(fragment, pred_it) - a method that stores a fragment and increments +the iterator to the next tile, guarded by a \ref predicate_iterator_concept + - store_post_increment(fragment) - a method that stores a fragment and increments the +iterator to the next tile + - store(fragment, pred_it) - a const method that stores a fragment, guarded by a \ref +predicate_iterator_concept + - store(fragment) - a method that loads a fragment + +@} +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief An iterator implementing \ref tile_store_iterator_concept for storing a tile to memory +* @concept{tile_store_iterator_concept} +*/ +template > +struct TileStoreIterator : public TileIteratorBase { + /// Base class + typedef TileIteratorBase + Base; + + /// concept TileTraits + typedef typename Base::Traits Traits; + + /// Scalar element + typedef typename Base::Scalar Scalar; + + /// Fragment element + typedef typename Base::FragmentElement FragmentElement; + + /// Specifies in which dimension post-increment accesses advance. + static IteratorAdvance::Kind const kAdvance = Base::kAdvance; + + /// Specifies type of iterator fragment storage (Salar or WmmaMatrix) + static IteratorFragment::Kind const kIteratorFragment = Base::kIteratorFragment; + + /// Source or destination memory space + static MemorySpace::Kind const kMemorySpace = Base::kMemorySpace; + + /// Index type + typedef typename Base::Index Index; + + /// Skew quantity + typedef typename Base::Skew Skew; + + /// Tile shape + typedef typename Base::Tile Tile; + + /// Delta + typedef typename Base::Delta Delta; + + /// Iterations + typedef typename Base::Iterations Iterations; + + /// ThreadOffset functor + typedef typename Base::ThreadOffset ThreadOffset; + + /// Fragment type + typedef typename Base::FragmentShape FragmentShape; + + /// Memory access type + typedef typename Base::AccessType AccessType; + + /// Fragment definition + typedef typename Base::Fragment Fragment; + + /// Fragment iterator definition + typedef typename Base::FragmentIterator FragmentIterator; + + /// Fragment const iterator definition + typedef typename Base::FragmentConstIterator FragmentConstIterator; + + /// Default predicate mask type + typedef typename Base::PredicateVector PredicateVector; + + /// Storage object which may be stored to + typedef typename Base::Storage SharedStorage; + + /// IteratorBase parameters + typedef typename Base::Params BaseParams; + + /// Parameters + struct Params : public BaseParams { + /// Pointer to memory + Scalar *pointer; + + /// Initialize params to access storage object + CUTLASS_HOST_DEVICE + int initialize(SharedStorage &storage) { + pointer = &storage[0]; + return 0; + } + + /// Initializes params to access a raw pointer + CUTLASS_HOST_DEVICE + int initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w) { + Base::Params::initialize(stride_d, stride_h, stride_w); + pointer = ptr; + return 0; + } + + /// Initializes params + CUTLASS_HOST_DEVICE + int initialize(Scalar *ptr, + Index _stride_d, + Index _stride_h, + Index _stride_w, + Index _inc_d, + Index _inc_h, + Index _inc_w, + Index _inc_advance) { + pointer = ptr; + Base::Params::initialize( + _stride_d, _stride_h, _stride_w, _inc_d, _inc_h, _inc_w, _inc_advance); + return 0; + } + + /// Initializes params to default values + CUTLASS_HOST_DEVICE + int initialize() { return Base::Params::initialize(); } + }; + + // + // Data members + // + + /// Parameters structure + Params params; + + /// Offset of an individual lane from the start of the tile + Coord<4> thread_offset; + + /// The stage. + int stage; + + // + // Static member functions + // + + /// Initializes a predicate vector + template + CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, + Coord<3> const &bounds, + Coord<3> const &block_offset = make_Coord(0, + 0, + 0)) { + Base::initialize_predicates( + predicate_it, + bounds, + block_offset + make_Coord(0, thread_offset[1], thread_offset[2] * Tile::kC)); + } + + // + // Methods + // + + /// Default constructor + CUTLASS_HOST_DEVICE + TileStoreIterator() {} + + /// Constructs a tile store iterator + CUTLASS_HOST_DEVICE + TileStoreIterator(Params const &_params, + Coord<3> const &block_offset = make_Coord(0, 0, 0), + ThreadOffset thread_offset_func = ThreadOffset()) + : params(_params), stage(0) { + thread_offset = thread_offset_func(); + + params.pointer += block_offset[0] * params.stride_d + + (block_offset[1] + thread_offset[1]) * params.stride_h + + (block_offset[2] + thread_offset[2] * Tile::kC) / Tile::kC * params.stride_w; + } + + /// Constructs a tile store iterator + CUTLASS_HOST_DEVICE + TileStoreIterator(Params const &, + SharedStorage &shared_storage, + Coord<3> const &block_offset = make_Coord(0, 0, 0), + ThreadOffset thread_offset_func = ThreadOffset()) + : stage(0) { + int const offset = thread_offset_func()[2]; + params.pointer = &shared_storage[offset]; + } + + /// Returns the current pointer + CUTLASS_HOST_DEVICE + Scalar *data() const { return params.pointer; } + + /// Increment in the D dimension + CUTLASS_HOST_DEVICE void inc_d() { params.pointer += params.inc_d; } + + /// Increment in the H dimension + CUTLASS_HOST_DEVICE void inc_h() { params.pointer += params.inc_h; } + + /// Increment in the W dimension + CUTLASS_HOST_DEVICE void inc_w() { params.pointer += params.inc_w; } + + /// Increment in the next dimension + CUTLASS_HOST_DEVICE void inc_advance() {} + + /// Increment the stage. + CUTLASS_DEVICE void inc_stage() { + if (Tile::kD > 1) { + int const kStageSize = Tile::kH * Tile::kW * Tile::kC; + if (stage == Tile::kD - 1) { + params.pointer -= (Tile::kD - 1) * kStageSize; + stage = 0; + } else { + params.pointer += kStageSize; + stage = stage + 1; + } + } + } + + public: + /// Stores a fragment and advances to the next tile. + template + CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment, PredicateIterator pred_it) { + FragmentIterator frag_iterator(fragment); + + for (int d = 0; d < Iterations::kD; ++d) { + for (int h = 0; h < Iterations::kH; ++h) { + for (int w = 0; w < Iterations::kW; ++w, ++pred_it) { + if (*pred_it) { + Store::store( + reinterpret_cast(frag_iterator.at(d, h, w, 0)), data(), 0); + } + if (w < Iterations::kW - 1) { + inc_w(); + } + } + if (h < Iterations::kH - 1) { + inc_h(); + } + } + if (d < Iterations::kD - 1) { + inc_d(); + } + } + inc_advance(); + } + + /// Stores a fragment and advances to the next tile. + template + CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment) { + typename PredicateVector::TrivialIterator pred_it; + store_post_increment(fragment, pred_it); + } + + /// Stores a fragment without advancing the iterator. + template + CUTLASS_HOST_DEVICE void store(Fragment &fragment, PredicateIterator pred_it) const { + TileStoreIterator _store_it(*this); + _store_it.store_post_increment(fragment, pred_it); + } + + /// Stores a fragment without advancing the iterator. + template + CUTLASS_HOST_DEVICE void store(Fragment &fragment) const { + typename PredicateVector::TrivialIterator pred_it; + store(fragment, pred_it); + } +}; +} diff --git a/cutlass/tile_traits_standard.h b/cutlass/tile_traits_standard.h new file mode 100644 index 00000000..14ecd01a --- /dev/null +++ b/cutlass/tile_traits_standard.h @@ -0,0 +1,238 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines tile traits for several tile partitioning arrangements of threads expected to + achieve efficient streaming performance. +*/ +#pragma once + +#include + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Basic thread offset function computed from a thread shape +template +struct TiledThreadOffset { + /// Computes the logical coordinate from thread shape + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + Coord<4> thread_offset; + + int index = threadIdx.x; + + thread_offset[3] = (index % ThreadShape::kC); + index = (index / ThreadShape::kC); + + thread_offset[2] = (index % ThreadShape::kW); + index = (index / ThreadShape::kW); + + thread_offset[1] = (index % ThreadShape::kH); + index = (index / ThreadShape::kH); + + thread_offset[0] = index; + + return thread_offset; + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Tiling in which the number of threads is greater than the +/// contiguous dimension of the tile. +template +struct TileTraitsStrideMajor { + /// Shape of tile + typedef Tile_ Tile; + + /// Number of participating threads + static int const kThreads = Threads; + + // Static assertions + static_assert(!(ShapeCount::kDhw % kThreads), + "Tiling undefined if elements not divisible by threads."); + + static_assert(Tile::kW <= kThreads, + "This specialization assumes there are more threads than the contiguous dimension " + "of the tile."); + + /// Shape of threads + typedef Shape<1, kThreads / Tile::kW, Tile::kW, 1> ThreadShape; + + /// Delta along each dimension + typedef Shape<1, ThreadShape::kH, 1, 1> Delta; + + /// Number of iterations + typedef Shape<1, Tile::kH / ThreadShape::kH, 1, 1> Iterations; + + /// Computes the initial offset + typedef TiledThreadOffset ThreadOffset; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Tiling in which the number of threads is fewer than the tile size +/// in the contiguous dimension. +template +struct TileTraitsContiguousMajor { + /// Shape of tile + typedef Tile_ Tile; + + /// Number of participating threads + static int const kThreads = Threads; + + // Static assertions + static_assert(Tile::kW >= kThreads, + "This specialization assumes there are more threads than the contiguous dimension " + "of the tile."); + + static_assert(!(ShapeCount::kDhw % kThreads), + "Tiling undefined if elements not divisible by threads."); + + static_assert(!(Tile::kW % kThreads), + "The contiguous size of the tile must be divisible by the number of threads."); + + /// Thread shape + typedef Shape<1, 1, kThreads> ThreadShape; + + /// Delta between each thread's access + typedef Shape<1, 1, kThreads> Delta; + + /// Number of iterations + typedef Shape<1, Tile::kH, Tile::kW / kThreads> Iterations; + + /// Computes the initial offset + typedef TiledThreadOffset ThreadOffset; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Tiling in which warps rake across the contiguous dimension +template +struct TileTraitsWarpRake { + /// Shape of tile + typedef Tile_ Tile; + + /// Number of participating threads + static int const kThreads = Threads; + + /// Hard-coded warp size + static int const kWarpSize = 32; + + /// Number of participating warps + static int const kWarpCount = kThreads / kWarpSize; + + // Static assertions + static_assert(!(ShapeCount::kDhw % kThreads), + "Tiling undefined if elements not divisible by threads."); + + static_assert(!(kThreads % kWarpSize), "Number of threads must be divisible by the warp size."); + + static_assert(!(Tile::kW % kWarpSize), "Contiguous dimension must be divisible by the warp size"); + + /// Warps strip-mined across strided dimension + static int const kWarpsStrided = __NV_STD_MIN(kWarpCount, Tile::kH); + + /// Warps stripmined contiguous dimension + static int const kWarpsContiguous = kWarpCount / kWarpsStrided; + + /// Arrangement of threads + typedef Shape<1, kWarpsStrided, kWarpsContiguous * kWarpSize> ThreadShape; + + /// The same warp rakes along the contiguous dimension + typedef Shape<1, kWarpsStrided, kWarpSize> Delta; + + /// Number of iterations + typedef Shape<1, Tile::kH / Delta::kH, Tile::kW / ThreadShape::kW> Iterations; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + /// Basic thread offset function computed from a thread shape + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int tid = threadIdx.x; + int warp = (tid / kWarpSize); + int lane = (tid % kWarpSize); + + static int const kWarpSpanContiguous = kWarpSize * Iterations::kW; + + int warp_w = (warp % kWarpsContiguous); + int warp_h = (warp / kWarpsContiguous); + + return make_Coord(0, warp_h, lane + kWarpSpanContiguous * warp_w, 0); + } + }; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Chooses 'best' shape to enable warp raking along contiguous dimension if possible. +template +struct TileTraitsStandard { + /// Shape of tile + typedef Tile_ Tile; + + /// Number of participating threads + static int const kThreads = Threads; + + /// Hard-coded warp size + static int const kWarpSize = 32; + + /// Number of participating warps + static int const kWarpCount = kThreads / kWarpSize; + + // Static assertions + static_assert(!(ShapeCount::kDhw % kThreads), + "Tiling undefined if elements not divisible by threads."); + + /// Choose the stride-major contiguous tiling if the contiguous dimension is + /// smaller than the warp size. Otherwise, if it is divisible by the warp size, + /// choose the warp rake arrangement. + typedef typename platform::conditional < + Tile::kW, + typename platform::conditional, + TileTraitsContiguousMajor >::type>:: + type Traits; + + /// Delta between accesses + typedef typename Traits::Delta Delta; + + /// Delta between each thread's access + /// TODO MTA this is wrong for sure, but Delta is used for stride computation at the moment + typedef Delta ImmediateOffsetStrides; + + /// Number of accesses + typedef typename Traits::Iterations Iterations; + + /// Thread offset functor + typedef typename Traits::ThreadOffset ThreadOffset; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/util/cutlass_math.h b/cutlass/util/cutlass_math.h new file mode 100644 index 00000000..0ecdc438 --- /dev/null +++ b/cutlass/util/cutlass_math.h @@ -0,0 +1,131 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#pragma once + +/** + * \file + * \brief Math utilities + */ + +#include + +namespace cutlass { + +/****************************************************************************** + * Static math utilities + ******************************************************************************/ + +/** + * Statically determine if N is a power-of-two + */ +template +struct is_pow2 : platform::integral_constant {}; + +/** + * Statically determine log2(N), rounded down + */ +template +struct log2_down { + /// Static logarithm value + enum { value = log2_down> 1), Count + 1>::value }; +}; + +// Base case +template +struct log2_down { + enum { value = Count }; +}; + +/** + * Statically determine log2(N), rounded up + */ +template +struct log2_up { + /// Static logarithm value + enum { value = log2_up> 1), Count + 1>::value }; +}; + +// Base case +template +struct log2_up { + enum { value = ((1 << Count) < N) ? Count + 1 : Count }; +}; + +/** + * Statically estimate sqrt(N) to the nearest power-of-two + */ +template +struct sqrt_est { + enum { value = 1 << (log2_up::value / 2) }; +}; + +/** + * For performing a constant-division with a compile-time assertion that the + * Divisor evenly-divides the Dividend. + */ +template +struct divide_assert { + enum { value = Dividend / Divisor }; + + static_assert((Dividend % Divisor == 0), "Not an even multiple"); +}; + +/****************************************************************************** + * Rounding + ******************************************************************************/ + +/** + * Round dividend up to the nearest multiple of divisor + */ +template +CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor) { + return ((dividend + divisor - 1) / divisor) * divisor; +} + +/** + * Greatest common divisor + */ +template +CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b) { + for (;;) { + if (a == 0) return b; + b %= a; + if (b == 0) return a; + a %= b; + } +} + +/** + * Least common multiple + */ +template +CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b) { + value_t temp = gcd(a, b); + + return temp ? (a / temp * b) : 0; +} + +} // namespace cutlass diff --git a/cutlass/util/debug.h b/cutlass/util/debug.h index 2aedd17a..6055e3fc 100644 --- a/cutlass/util/debug.h +++ b/cutlass/util/debug.h @@ -1,29 +1,27 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - ******************************************************************************/ + **************************************************************************************************/ #pragma once @@ -44,87 +42,81 @@ namespace cutlass { * Formats and prints the given message to stdout */ #if !defined(CUDA_LOG) - #if !defined(__CUDA_ARCH__) - #define CUDA_LOG(format, ...) printf(format, __VA_ARGS__) - #else -inline __host__ __device__ unsigned get_threadidx_x() { return threadIdx.x; } -inline __host__ __device__ unsigned get_threadidx_y() { return threadIdx.y; } -inline __host__ __device__ unsigned get_threadidx_z() { return threadIdx.z; } -inline __host__ __device__ unsigned get_blockidx_x() { return blockIdx.x; } -inline __host__ __device__ unsigned get_blockidx_y() { return blockIdx.y; } -inline __host__ __device__ unsigned get_blockidx_z() { return blockIdx.z; } - #define CUDA_LOG(format, ...) \ - printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, \ - get_blockidx_x(), get_blockidx_y(), get_blockidx_z(), \ - get_threadidx_x(), get_threadidx_y(), get_threadidx_z(), \ - __VA_ARGS__); - #endif +#if !defined(__CUDA_ARCH__) +#define CUDA_LOG(format, ...) printf(format, __VA_ARGS__) +#else +#define CUDA_LOG(format, ...) \ + printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, \ + blockIdx.x, \ + blockIdx.y, \ + blockIdx.z, \ + threadIdx.x, \ + threadIdx.y, \ + threadIdx.z, \ + __VA_ARGS__); +#endif #endif - /** * Formats and prints the given message to stdout only if DEBUG is defined */ #if !defined(CUDA_LOG_DEBUG) - #ifdef DEBUG - #define CUDA_LOG_DEBUG(format, ...) CUDA_LOG(format, __VA_ARGS__) - #else - #define CUDA_LOG_DEBUG(format, ...) - #endif +#ifdef DEBUG +#define CUDA_LOG_DEBUG(format, ...) CUDA_LOG(format, __VA_ARGS__) +#else +#define CUDA_LOG_DEBUG(format, ...) +#endif #endif - /** - * \brief The corresponding error message is printed to \p stderr (or \p stdout in device code) along with the supplied source context. + * \brief The corresponding error message is printed to \p stderr (or \p stdout in device code) + * along with the supplied source context. * * \return The CUDA error. */ -__host__ __device__ inline cudaError_t cuda_perror_impl( - cudaError_t error, - const char* filename, - int line) -{ - (void)filename; - (void)line; - if (error) - { +__host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl(cudaError_t error, + const char* filename, + int line) { + (void)filename; + (void)line; + if (error) { #if !defined(__CUDA_ARCH__) - fprintf(stderr, "CUDA error %d [%s, %d]: %s\n", error, filename, line, cudaGetErrorString(error)); - fflush(stderr); + fprintf( + stderr, "CUDA error %d [%s, %d]: %s\n", error, filename, line, cudaGetErrorString(error)); + fflush(stderr); #else - printf("CUDA error %d [%s, %d]\n", error, filename, line); + printf("CUDA error %d [%s, %d]\n", error, filename, line); #endif - } - return error; + } + return error; } - /** * \brief Perror macro */ #ifndef CUDA_PERROR - #define CUDA_PERROR(e) cuda_perror_impl((cudaError_t) (e), __FILE__, __LINE__) +#define CUDA_PERROR(e) cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__) #endif - /** * \brief Perror macro with exit */ #ifndef CUDA_PERROR_EXIT - #define CUDA_PERROR_EXIT(e) if (cuda_perror_impl((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } +#define CUDA_PERROR_EXIT(e) \ + if (cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)) { \ + exit(1); \ + } #endif - /** * \brief Perror macro only if DEBUG is defined */ #ifndef CUDA_PERROR_DEBUG - #ifdef DEBUG - #define CUDA_PERROR_DEBUG(e) CUDA_PERROR(e) - #else - #define CUDA_PERROR_DEBUG(e) (e) - #endif +#ifdef DEBUG +#define CUDA_PERROR_DEBUG(e) CUDA_PERROR(e) +#else +#define CUDA_PERROR_DEBUG(e) (e) +#endif #endif - -} // namespace cutlass +} // namespace cutlass diff --git a/cutlass/util/device_introspection.h b/cutlass/util/device_introspection.h deleted file mode 100644 index b4946e0c..00000000 --- a/cutlass/util/device_introspection.h +++ /dev/null @@ -1,224 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Utilities for device introspection - */ - -#include "debug.h" -#include "nv_std.h" -#include "printable.h" - -namespace cutlass { - - -/****************************************************************************** - * math_operation_class_t - * - * Enumeration to select the appropriate math operation - * - * The assumption is multiple math operations may be used to compute GEMM - * for a given selection of operand and accumulator types. - * - ******************************************************************************/ - -/// Math operation -enum class math_operation_class_t -{ - scalar, // scalar (and vector) multiply-accumulate operations - matrix // Volta tensor operations -}; - -/****************************************************************************** - * arch_family_t - ******************************************************************************/ - -/** - * \brief Enumeration of NVIDIA GPU architectural families - */ -struct arch_family_t -{ - /// \brief Enumerants - enum kind_t - { - Unsupported = 0, - Kepler = 3, - Maxwell = 5, - Volta = 7, - }; - - /// Enumerant value - kind_t kind; - - /// Default constructor - arch_family_t() : kind(Unsupported) {} - - /// Copy constructor - arch_family_t(const kind_t &other_kind) : kind(other_kind) {} - - /// Cast to kind_t - operator kind_t() const { return kind; } - - /// Returns the instance as a string - __host__ __device__ inline - char const* to_string() const - { - switch (kind) - { - case Kepler: return "Kepler"; - case Maxwell: return "Maxwell"; - case Volta: return "Volta"; - case Unsupported: - default: return "Unsupported"; - } - } - - /// Insert the formatted instance into the output stream - void print(std::ostream& out) const { out << to_string(); } - -}; - - -/** - * Macro for architecture targeted by the current compiler pass - */ -#if defined(__CUDA_ARCH__) - #define CUTLASS_ARCH __CUDA_ARCH__ -#else - #define CUTLASS_ARCH 0 -#endif - - -/** - * Macro for architecture family targeted by the current compiler pass - */ -#define CUTLASS_ARCH_FAMILY \ - ( \ - (CUTLASS_ARCH < 300) ? \ - arch_family_t::Unsupported : \ - (CUTLASS_ARCH < 500) ? \ - arch_family_t::Kepler : \ - (CUTLASS_ARCH < 700) ? \ - arch_family_t::Maxwell : \ - arch_family_t::Volta \ - ) - - - - -/****************************************************************************** - * Device introspection - ******************************************************************************/ - -/** - * Empty kernel for querying PTX manifest metadata (e.g., version) for the current device - */ -template -__global__ void empty_kernel(void) { } - - - -/** - * \brief Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) - */ -cudaError_t ptx_version(int &version) -{ - struct Dummy - { - /// Type definition of the empty_kernel kernel entry point - typedef void (*EmptyKernelPtr)(); - - /// Force empty_kernel to be generated if this class is used - EmptyKernelPtr Empty() - { - return empty_kernel; - } - }; - - cudaError_t error = cudaSuccess; - do - { - cudaFuncAttributes empty_kernel_attrs; - if (CUDA_PERROR_DEBUG(error = cudaFuncGetAttributes(&empty_kernel_attrs, empty_kernel))) break; - version = empty_kernel_attrs.ptxVersion * 10; - } - while (0); - - return error; -} - - -/** - * \brief Retrieves the SM version (major * 100 + minor * 10) for the current device - */ -cudaError_t get_sm_version(int &sm_version) -{ - cudaError_t error = cudaSuccess; - - // Get device ordinal - int device_ordinal; - if (CUDA_PERROR_DEBUG(error = cudaGetDevice(&device_ordinal))) - return error; - - // Fill in SM version - int major, minor; - if (CUDA_PERROR_DEBUG(error = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device_ordinal))) - return error; - if (CUDA_PERROR_DEBUG(error = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_ordinal))) - return error; - sm_version = major * 100 + minor * 10; - - return error; -} - - -/** - * \brief Retrieves the count for the current device - */ -cudaError_t get_sm_count(int &sm_count) -{ - cudaError_t error = cudaSuccess; - - // Get device ordinal - int device_ordinal; - if (CUDA_PERROR_DEBUG(error = cudaGetDevice(&device_ordinal))) - return error; - - // Get SM count - if (CUDA_PERROR_DEBUG(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) - return error; - - return error; -} - - -} // namespace cutlass - - diff --git a/cutlass/util/io_intrinsics.h b/cutlass/util/io_intrinsics.h deleted file mode 100644 index dca92da6..00000000 --- a/cutlass/util/io_intrinsics.h +++ /dev/null @@ -1,492 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief I/O device intrinsics - */ - -#include -#include - -#include "nv_std.h" -#include "math.h" - -namespace cutlass { - - - - -/****************************************************************************** - * io_vector - ******************************************************************************/ - -/** - * Base aligned storage for IO vector - */ -template struct io_vector_base; -template struct __align__(1) io_vector_base { value_t buff[VectorItems]; }; -template struct __align__(2) io_vector_base { value_t buff[VectorItems]; }; -template struct __align__(4) io_vector_base { value_t buff[VectorItems]; }; -template struct __align__(8) io_vector_base { value_t buff[VectorItems]; }; -template struct __align__(16) io_vector_base { value_t buff[VectorItems]; }; - - -/** - * \brief Aligned vector type for coarsening data movement instructions - * - * Exposes the member constant \p VectorItems, the actual number of component - * values comprising the io_vector - */ -template < - typename value_t, ///< Component value type - int MaxVectorItems, ///< Maximum allowable component values - int MaxAlignBytes ///< Maximum allowable alignment - = __NV_STD_MIN(16, MaxVectorItems * sizeof(value_t)), - int AlignBytes ///< Actual alignment - = __NV_STD_MIN(sizeof(value_t) * MaxVectorItems, MaxAlignBytes), - int VectorItems ///< Actual number of component values - = divide_assert::value, - bool MustAlias ///< Whether we need to alias during loads/stores - = (VectorItems > 4)> -struct io_vector; - - -/** - * IO vector (specialization for VectorItems <= 4) - */ -template < - typename value_t, - int MaxVectorItems, - int MaxAlignBytes, - int _AlignBytes, - int _VectorItems> -struct io_vector < - value_t, - MaxVectorItems, - MaxAlignBytes, - _AlignBytes, - _VectorItems, - false> -: - io_vector_base -{ - enum - { - VectorItems = _VectorItems, - AlignBytes = _AlignBytes - }; - - static_assert(is_pow2::value, "I/O vector alignment must be a power-of-two."); - static_assert((AlignBytes <= 16), "I/O vector alignment must <= 16B."); - - inline __device__ - void load(const io_vector *ptr) - { - *this = *ptr; - } - - inline __device__ - void load(const value_t *ptr) - { - *this = *reinterpret_cast(ptr); - } - - - inline __device__ - void store(io_vector *ptr) const - { - *ptr = *this; - } - - inline __device__ - void store(value_t *ptr) const - { - *reinterpret_cast(ptr) = *this; - } -}; - - -/** - * IO vector (specialization for VectorItems > 4) - * - * NB: Workaround for NVCC not generating 128-bit loads/stores for aligned - * structures having component types < 32b - */ -template < - typename value_t, - int MaxVectorItems, - int MaxAlignBytes, - int _AlignBytes, - int _VectorItems> -struct io_vector < - value_t, - MaxVectorItems, - MaxAlignBytes, - _AlignBytes, - _VectorItems, - true> -: - io_vector_base -{ - enum - { - VectorItems = _VectorItems, - AlignBytes = _AlignBytes - }; - - static_assert(is_pow2::value, "I/O vector alignment must be a power-of-two."); - static_assert((AlignBytes <= 16), "I/O vector alignment must <= 16B."); - - typedef typename nv_std::conditional<(AlignBytes == 8), - uint2, // Use 8B load - uint4> // Use 16B load - ::type align_t; - - inline __device__ - void load(const io_vector *ptr) - { - *reinterpret_cast(this) = *reinterpret_cast(ptr); - } - - inline __device__ - void load(const value_t *ptr) - { - *reinterpret_cast(this) = *reinterpret_cast(ptr); - } - - - inline __device__ - void store(io_vector *ptr) const - { - *reinterpret_cast(ptr) = *reinterpret_cast(this); - } - - inline __device__ - void store(value_t *ptr) const - { - *reinterpret_cast(ptr) = *reinterpret_cast(this); - } - -}; - - - - - - - -/****************************************************************************** - * Macro expansions for vector loads - ******************************************************************************/ - -/** - * Define vector-4 LD specialization for the given load modifier - */ -#define CUTLASS_LD_V4(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - value_t (&dest)[4], \ - ptr_t ptr) \ - { \ - asm volatile ("ld."#load_modifier".v4."#ptx_type" {%0, %1, %2, %3}, [%4];\n" \ - : \ - "="#val_constraint(dest[0]), \ - "="#val_constraint(dest[1]), \ - "="#val_constraint(dest[2]), \ - "="#val_constraint(dest[3]) \ - : \ - #ptr_constraint(ptr)); \ - } - -/** - * Define vector-2 LD specialization for the given load modifier - */ -#define CUTLASS_LD_V2(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - value_t (&dest)[2], \ - ptr_t ptr) \ - { \ - asm volatile ("ld."#load_modifier".v2."#ptx_type" {%0, %1}, [%2];\n" \ - : \ - "="#val_constraint(dest[0]), \ - "="#val_constraint(dest[1]) \ - : \ - #ptr_constraint(ptr)); \ - } - - -/** - * Define vector-1 LD specialization for the given load modifier - */ -#define CUTLASS_LD_V1(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - value_t (&dest)[1], \ - ptr_t ptr) \ - { \ - asm volatile ("ld."#load_modifier"."#ptx_type" %0, [%1];\n" \ - : \ - "="#val_constraint(dest[0]) \ - : \ - #ptr_constraint(ptr)); \ - } - - -/** - * Define powers-of-two vector LD specializations - */ -#define CUTLASS_LD_ALL(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_LD_V4(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_LD_V2(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_LD_V1(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) - - -/****************************************************************************** - * Macro expansions for vector stores - ******************************************************************************/ - -/** - * Define vector-4 ST specialization for the given load modifier - */ -#define CUTLASS_ST_V4(f_name, value_t, store_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - ptr_t ptr, \ - const value_t (&src)[4]) \ - { \ - asm volatile ("st."#store_modifier".v4."#ptx_type" [%0], {%1, %2, %3, %4};\n" \ - : : \ - #ptr_constraint(ptr), \ - #val_constraint(src[0]), \ - #val_constraint(src[1]), \ - #val_constraint(src[2]), \ - #val_constraint(src[3])); \ - } - - -/** - * Define vector-2 ST specialization for the given load modifier - */ -#define CUTLASS_ST_V2(f_name, value_t, store_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - ptr_t ptr, \ - const value_t (&src)[2]) \ - { \ - asm volatile ("st."#store_modifier".v2."#ptx_type" [%0], {%1, %2};\n" \ - : : \ - #ptr_constraint(ptr), \ - #val_constraint(src[0]), \ - #val_constraint(src[1])); \ - } - -/** - * Define vector-1 ST specialization for the given load modifier - */ -#define CUTLASS_ST_V1(f_name, value_t, store_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - ptr_t ptr, \ - const value_t (&src)[1]) \ - { \ - asm volatile ("st."#store_modifier"."#ptx_type" [%0], %1;\n" \ - : : \ - #ptr_constraint(ptr), \ - #val_constraint(src[0])); \ - } - - -/** - * Define powers-of-two vector LD specializations - */ -#define CUTLASS_ST_ALL(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_ST_V4(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_ST_V2(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_ST_V1(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) - - - -/****************************************************************************** - * Macro expansions for vector IO - ******************************************************************************/ - -/** - * Define global and shared LD specializations - */ -#define CUTLASS_IO(value_t, ptx_type, val_constraint) \ - CUTLASS_LD_ALL(ldg_cg_internal, value_t, global.cg, ptx_type, val_constraint, l) \ - CUTLASS_ST_ALL(stg_cg_internal, value_t, global.cg, ptx_type, val_constraint, l) - - -// Define IO for useful types -CUTLASS_IO(double, f64, d) -CUTLASS_IO(float, f32, f) -CUTLASS_IO(int64_t, b64, l) -CUTLASS_IO(int32_t, b32, r) -CUTLASS_IO(int16_t, b16, h) - - -// Macro cleanup -#undef CUTLASS_IO -#undef CUTLASS_LD_ALL -#undef CUTLASS_LD_V4 -#undef CUTLASS_LD_V2 -#undef CUTLASS_LD_V1 -#undef CUTLASS_ST_ALL -#undef CUTLASS_ST_V4 -#undef CUTLASS_ST_V2 -#undef CUTLASS_ST_V1 - - -/****************************************************************************** - * I/O cast types - ******************************************************************************/ - -/// Provides the type for which to reinterpret-cast a given vector -template < - typename value_t, - int IoVecDim, - int ValueBytes = sizeof(value_t)> -struct io_cast -{ - typedef value_t type[IoVecDim]; -}; - - -/// Provides the type for which to reinterpret-cast a vector of 1B types -template < - typename value_t, - int IoVecDim> -struct io_cast -{ - typedef typename nv_std::conditional< - (IoVecDim < 2), - int8_t[1], // Use 8b load - typename nv_std::conditional< - (IoVecDim < 4), - int16_t[1], // Use 16b load - int32_t[IoVecDim / 4]>::type>::type // Use up to 128b load - type; -}; - - -/// Provides the type for which to reinterpret-cast a vector of 2B types -template < - typename value_t, - int IoVecDim> -struct io_cast -{ - typedef typename nv_std::conditional< - (IoVecDim < 2), - int16_t[1], // Use 16b load - int32_t[IoVecDim / 2]>::type // Use up to 128b load - type; -}; - - - -/****************************************************************************** - * ldg_cg intrinsics - ******************************************************************************/ - -/// Load from global (cache-global modifier) -template -inline __device__ -void ldg_cg( - value_t &dest, - ptr_t d_in) -{ - // Cast dest to a different array type if necessary - ldg_cg_internal( - reinterpret_cast::type &>(dest), - d_in); -} - -/// Load from global (cache-global modifier) -template -inline __device__ -void ldg_cg( - value_t (&dest)[IoVecDim], - ptr_t d_in) -{ - static_assert(is_pow2::value, "I/O vectors must be a power-of-two."); - - // Cast dest to a different array type if necessary - ldg_cg_internal( - reinterpret_cast::type &>(dest), - d_in); -} - - -/****************************************************************************** - * stg_cg intrinsics - ******************************************************************************/ - -/// Store to global (cache-global modifier) -template -inline __device__ -void stg_cg( - ptr_t dest, - const value_t &src) -{ - // Cast src to a different array type if necessary - stg_cg_internal( - dest, - reinterpret_cast::type &>(src)); -} - -/// Store to global (cache-global modifier) -template -inline __device__ -void stg_cg( - ptr_t dest, - const value_t (&src)[IoVecDim]) -{ - static_assert(is_pow2::value, "I/O vectors must be a power-of-two."); - - // Cast src to a different array type if necessary - stg_cg_internal( - dest, - reinterpret_cast::type &>(src)); -} - - - - - -} // namespace cutlass - diff --git a/cutlass/util/math.h b/cutlass/util/math.h deleted file mode 100644 index bddad671..00000000 --- a/cutlass/util/math.h +++ /dev/null @@ -1,167 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Math utilities - */ - -#include "nv_std.h" - -namespace cutlass { - - -/****************************************************************************** - * Static math utilities - ******************************************************************************/ - -/** - * Statically determine if N is a power-of-two - */ -template -struct is_pow2 : nv_std::integral_constant -{}; - - - - - -/** - * Statically determine log2(N), rounded down - */ -template -struct log2_down -{ - /// Static logarithm value - enum { value = log2_down> 1), Count + 1>::value }; -}; - -// Base case -template -struct log2_down -{ - enum { value = Count }; -}; - - - - -/** - * Statically determine log2(N), rounded up - */ -template -struct log2_up -{ - /// Static logarithm value - enum { value = log2_up> 1), Count + 1>::value }; -}; - -// Base case -template -struct log2_up -{ - enum { value = ((1 << Count) < N) ? Count + 1 : Count }; -}; - - - -/** - * Statically estimate sqrt(N) to the nearest power-of-two - */ -template -struct sqrt_est -{ - enum { value = 1 << (log2_up::value / 2) }; -}; - - - -/** - * For performing a constant-division with a compile-time assertion that the - * Divisor evenly-divides the Dividend. - */ -template -struct divide_assert -{ - enum { value = Dividend / Divisor}; - - static_assert((Dividend % Divisor == 0), "Not an even multiple"); -}; - - - - - -/****************************************************************************** - * Rounding - ******************************************************************************/ - -/** - * Round dividend up to the nearest multiple of divisor - */ -template -inline __host__ __device__ -dividend_t round_nearest(dividend_t dividend, divisor_t divisor) -{ - return ((dividend + divisor - 1) / divisor) * divisor; -} - - -/** - * Greatest common divisor - */ -template -inline __host__ __device__ -value_t gcd(value_t a, value_t b) -{ - for (;;) - { - if (a == 0) return b; - b %= a; - if (b == 0) return a; - a %= b; - } -} - - -/** - * Least common multiple - */ -template -inline __host__ __device__ -value_t lcm(value_t a, value_t b) -{ - value_t temp = gcd(a, b); - - return temp ? (a / temp * b) : 0; -} - - -} // namespace cutlass - diff --git a/cutlass/util/matrix_transform.h b/cutlass/util/matrix_transform.h deleted file mode 100644 index f3341e92..00000000 --- a/cutlass/util/matrix_transform.h +++ /dev/null @@ -1,102 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Enumeration of dense matrix view transformations - */ - -#include "printable.h" - -namespace cutlass { - - -/****************************************************************************** - * matrix_transform_t - ******************************************************************************/ - -/** - * \brief Enumeration of dense matrix view transformations - * - * These enumerators (and corresponding tag types) describe which view - * transformation needs to be applied prior to operation upon a given dense - * matrix. Its values correspond to Fortran characters 'n' (non-transpose), - * 't'(transpose) and 'c'(conjugate transpose) that are often - * used as parameters to legacy BLAS implementations - */ -struct matrix_transform_t : printable_t -{ - /// \brief Enumerants (same as CUBLAS) - enum kind_t - { - /// Invalid view - Invalid = -1, - - /// Non-transpose view - NonTranspose = 0, - - /// Transpose view - Transpose = 1, - - /// Conjugate transpose view - ConjugateTranpose = 2, - }; - - /// Enumerant value - kind_t kind; - - /// Default constructor - matrix_transform_t() : kind(Invalid) {} - - /// Copy constructor - matrix_transform_t(const kind_t &other_kind) : kind(other_kind) {} - - /// Cast to kind_t - operator kind_t() const { return kind; } - - /// Returns the instance as a string - __host__ __device__ inline - char const* to_string() const - { - switch (kind) - { - case NonTranspose: return "NonTranspose"; - case Transpose: return "Transpose"; - case ConjugateTranpose: return "ConjugateTranpose"; - default: return "Invalid"; - } - } - - /// Insert the formatted instance into the output stream - void print(std::ostream& out) const { out << to_string(); } - -}; - - -} // namespace cutlass diff --git a/cutlass/util/nv_std.h b/cutlass/util/nv_std.h deleted file mode 100644 index 819df3a0..00000000 --- a/cutlass/util/nv_std.h +++ /dev/null @@ -1,705 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief C++ features that may be otherwise unimplemented for CUDA device functions. - * - * This file has three components: - * - * (1) Macros: - * - Empty macro defines for C++ keywords not supported by the current - * version of C++. These simply allow compilation to proceed (but do - * not provide the added semantics). - * - \p noexcept - * - \p constexpr - * - \p nullptr - * - \p static_assert - * - * - Macro functions that we need in constant expressions because the - * C++ equivalents require constexpr compiler support. These are - * prefixed with \p __NV_STD_* - * - \p __NV_STD_MAX - * - \p __NV_STD_MIN - * - * (2) Re-implementations of STL functions and types: - * - C++ features that need the \p __device__ annotation. These are - * placed into the \p nv_std namespace. - * - \p plus - * - \p less - * - \p greater - * - \p min - * - \p max - * - \p methods on std::pair (==, !=, <, <=, >, >=, and make_pair()) - * - * (3) Stop-gap implementations of unsupported STL functions and types: - * - STL functions and types defined by C++ 11/14/17/etc. that are not - * provided by the current version of C++. These are placed into the - * \p nv_std namespace - * - \p integral_constant - * - \p nullptr_t - * - \p true_type - * - \p false_type - * - \p bool_constant - * - \p enable_if - * - \p conditional - * - \p is_same - * - \p is_base_of - * - \p remove_const - * - \p remove_volatile - * - \p remove_cv - * - \p is_volatile - * - \p is_pointer - * - \p is_void - * - \p is_integral - * - \p is_floating_point - * - \p is_arithmetic - * - \p is_fundamental - * - \p is_trivially_copyable - * - \p alignment_of - * - \p aligned_storage - * - * (4) Functions and types that are STL-like (but aren't in the STL): - * - \p TODO: min and max functors? - * - * The idea is that, as we drop support for older compilers, we can simply #define - * the \p __NV_STD_XYZ macros and \p nv_std namespace to alias their C++ - * counterparts (or trivially find-and-replace their occurrences in code text). - */ - - -//----------------------------------------------------------------------------- -// Include STL files that nv_std provides functionality for -//----------------------------------------------------------------------------- - -#include // nullptr_t -#include // Minimum/maximum operations -#include // Arithmetic operations -#include // For methods on std::pair -#if (!defined(_MSC_VER) && (__cplusplus >= 201103L)) || (defined(_MSC_VER) && (_MS_VER >= 1500)) - #include // For integral constants, conditional metaprogramming, and type traits -#endif - - - -/****************************************************************************** - * Macros - ******************************************************************************/ -//----------------------------------------------------------------------------- -// Keywords -//----------------------------------------------------------------------------- - -/// noexcept, constexpr -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1900)) - #ifndef noexcept - #define noexcept - #endif - #ifndef constexpr - #define constexpr - #endif -#endif - -/// nullptr -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1310 )) - #ifndef nullptr - #define nullptr 0 - #endif -#endif - -/// static_assert -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600 )) - #ifndef static_assert - #define __nv_std_cat_(a, b) a ## b - #define __nv_std_cat(a, b) __nv_std_cat_(a, b) - #define static_assert(__e, __m) typedef int __nv_std_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1] - #endif -#endif - - -//----------------------------------------------------------------------------- -// Functions -//----------------------------------------------------------------------------- - -/// Select maximum(a, b) -#ifndef __NV_STD_MAX - #define __NV_STD_MAX(a, b) (((b) > (a)) ? (b) : (a)) -#endif - -/// Select minimum(a, b) -#ifndef __NV_STD_MIN - #define __NV_STD_MIN(a, b) (((b) < (a)) ? (b) : (a)) -#endif - - - - -/****************************************************************************** - * Re-implementations - ******************************************************************************/ - -namespace nv_std { - - //----------------------------------------------------------------------------- - // Arithmetic operations, comparisons - //----------------------------------------------------------------------------- - - /// nv_std::plus - template - struct plus - { - inline __host__ __device__ - constexpr T operator()(const T &lhs, const T &rhs) const - { - return lhs + rhs; - } - }; - - - /// std::less - template - struct less - { - inline __host__ __device__ - constexpr bool operator()(const T &lhs, const T &rhs) const - { - return lhs < rhs; - } - }; - - /// std::greater - template - struct greater - { - inline __host__ __device__ - constexpr bool operator()(const T &lhs, const T &rhs) const - { - return lhs > rhs; - } - }; - - - //----------------------------------------------------------------------------- - // Minimum/maximum operations - //----------------------------------------------------------------------------- - - /// std::min - template - inline __host__ __device__ - constexpr const T& min( - const T& a, - const T& b) - { - return (b < a) ? b : a; - } - - /// std::max - template - inline __host__ __device__ - constexpr const T& max( - const T& a, - const T& b) - { - return (a < b) ? b : a; - } - - - //----------------------------------------------------------------------------- - // Methods on std::pair - //----------------------------------------------------------------------------- - - using std::pair; - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator==( const pair& lhs, const pair& rhs ) - { - return (lhs.first == rhs.first) && (lhs.second == rhs.second); - } - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator!=( const pair& lhs, const pair& rhs ) - { - return (lhs.first != rhs.first) && (lhs.second != rhs.second); - } - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator<( const pair& lhs, const pair& rhs ) - { - return (lhs.first < rhs.first) ? - true : - (rhs.first < lhs.first) ? - false : - (lhs.second < rhs.second); - } - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator<=( const pair& lhs, const pair& rhs ) - { - return !(rhs < lhs); - } - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator>( const pair& lhs, const pair& rhs ) - { - return (rhs < lhs); - } - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator>=( const pair& lhs, const pair& rhs ) - { - return !(lhs < rhs); - } - - template< class T1, class T2 > - inline __host__ __device__ - std::pair make_pair( T1 t, T2 u ) - { - std::pair retval; - retval.first = t; - retval.second = u; - return retval; - } - -} // namespace nv_std - - - -/****************************************************************************** - * Implementations of C++ 11/14/17/... STL features - ******************************************************************************/ - -namespace nv_std { - -//----------------------------------------------------------------------------- -// Integral constant helper types -//----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) - - /// std::integral_constant - template - struct integral_constant; - - /// std::integral_constant - template - struct integral_constant - { - static const value_t value = V; - - typedef value_t value_type; - typedef integral_constant type; - - inline __host__ __device__ operator value_type() const - { - return value; - } - - inline __host__ __device__ const value_type operator()() const - { - return value; - } - }; - - -#else - - using std::integral_constant; - using std::pair; - -#endif - - /// The type used as a compile-time boolean with true value. - typedef integral_constant true_type; - - /// The type used as a compile-time boolean with false value. - typedef integral_constant false_type; - - -#if (!defined(_MSC_VER) && (__cplusplus < 201402L)) || (defined(_MSC_VER) && (_MSC_VER < 1900)) - - /// std::bool_constant - template - struct bool_constant : nv_std::integral_constant - {}; - -#else - - using std::bool_constant; - -#endif - - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1700)) - - /// std::nullptr_t - struct nullptr_t {}; - -#else - - using std::nullptr_t; - -#endif - - - - //----------------------------------------------------------------------------- - // Conditional metaprogramming - //----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600)) - - /// std::enable_if (true specialization) - template - struct enable_if { - typedef T type; - }; - - /// std::enable_if (false specialization) - template - struct enable_if { }; - - - /// std::conditional (true specialization) - template - struct conditional { typedef T type; }; - - /// std::conditional (false specialization) - template - struct conditional { typedef F type; }; - -#else - - using std::enable_if; - using std::conditional; - -#endif - - - - //----------------------------------------------------------------------------- - // Const/volatility specifiers - //----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) - - /// std::remove_const (non-const specialization) - template struct remove_const { typedef T type; }; - - /// std::remove_const (const specialization) - template struct remove_const { typedef T type; }; - - - - /// std::remove_volatile (non-volatile specialization) - template struct remove_volatile { typedef T type; }; - - /// std::remove_volatile (volatile specialization) - template struct remove_volatile { typedef T type; }; - - - - /// std::remove_cv - template - struct remove_cv { - typedef typename remove_volatile::type>::type type; - }; - -#else - - using std::remove_const; - using std::remove_volatile; - using std::remove_cv; - -#endif - - - //----------------------------------------------------------------------------- - // Type relationships - //----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) - - /// std::is_same (false specialization) - template - struct is_same : false_type - {}; - - /// std::is_same (true specialization) - template - struct is_same : true_type - {}; - - - /// Helper for std::is_base_of - template - struct is_base_of_helper - { - typedef char (&yes)[1]; - typedef char (&no)[2]; - - template - struct dummy - { - operator B*() const; - operator D*(); - }; - - template - static yes check(DerivedT*, T); - - static no check(BaseT*, int); - - static const bool value = sizeof(check(dummy(), int())) == sizeof(yes); - }; - - /// std::is_base_of - template - struct is_base_of : integral_constant< - bool, - (is_base_of_helper::type, typename remove_cv::type>::value) || - (is_same::type, typename remove_cv::type>::value)> - {}; - - -#else - - using std::is_same; - using std::is_base_of; - -#endif - - - - //----------------------------------------------------------------------------- - // Type properties - //----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) - - /// std::is_volatile - template struct is_volatile : false_type {}; - template struct is_volatile : true_type {}; - - - /// Helper for std::is_pointer (false specialization) - template struct is_pointer_helper : false_type {}; - - /// Helper for std::is_pointer (true specialization) - template struct is_pointer_helper : true_type {}; - - /// std::is_pointer - template struct is_pointer : is_pointer_helper::type> {}; - - - - /// std::is_void - template - struct is_void : is_same::type> - {}; - - - - /// std::is_integral - template struct is_integral : false_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template struct is_integral : is_integral {}; - template struct is_integral : is_integral {}; - template struct is_integral : is_integral {}; - - - - /// std::is_floating_point - template - struct is_floating_point : integral_constant< - bool, - (is_same::type>::value || - is_same::type>::value)> - {}; - - - - /// std::is_arithmetic - template - struct is_arithmetic : - integral_constant::value || is_floating_point::value)> - {}; - - - /// std::is_fundamental - template - struct is_fundamental : integral_constant< - bool, (is_arithmetic::value || - is_void::value || - is_same::type>::value)> - {}; - - - - -#else - - using std::is_volatile; - using std::is_pointer; - using std::is_void; - using std::is_integral; - using std::is_floating_point; - using std::is_arithmetic; - using std::is_fundamental; - -#endif - - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || \ - (defined(_MSC_VER) && (_MSC_VER < 1800)) || \ - (defined(__GNUG__) && (__GNUC__ < 5)) - - /** - * std::is_trivially_copyable - * - * This implementation only evaluates true if T is fundamental or pointer - * - * Without help from partial template specializations provided by the user for - * a specific class or struct, this trait will never report that the specified - * class or struct is trivially-copyable ; this is always safe, - * if possibly sub-optimal. - */ - template - struct is_trivially_copyable : - integral_constant::value || is_pointer::value)> - {}; - -#else - - using std::is_trivially_copyable; - -#endif - - - - - //----------------------------------------------------------------------------- - // Alignment and layout utilities - //----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) - - - /// std::alignment_of - template - struct alignment_of - { - struct pad - { - value_t val; - char byte; - }; - - enum - { - value = sizeof(pad) - sizeof(value_t) - }; - }; - -#else - - template - struct alignment_of : std::alignment_of {}; - -#endif - - /* 16B specializations where 32-bit Win32 host compiler disagrees with device compiler */ - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - - // Specializations for volatile/const qualified types - template struct alignment_of : alignment_of {}; - template struct alignment_of : alignment_of {}; - template struct alignment_of : alignment_of {}; - - - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800)) - - template struct aligned_chunk; - template<> struct __align__(1) aligned_chunk<1> { uint8_t buff; }; - template<> struct __align__(2) aligned_chunk<2> { uint16_t buff; }; - template<> struct __align__(4) aligned_chunk<4> { uint32_t buff; }; - template<> struct __align__(8) aligned_chunk<8> { uint32_t buff[2]; }; - template<> struct __align__(16) aligned_chunk<16> { uint32_t buff[4]; }; - template<> struct __align__(32) aligned_chunk<32> { uint32_t buff[8]; }; - template<> struct __align__(64) aligned_chunk<64> { uint32_t buff[16]; }; - template<> struct __align__(128) aligned_chunk<128> { uint32_t buff[32]; }; - template<> struct __align__(256) aligned_chunk<256> { uint32_t buff[64]; }; - template<> struct __align__(512) aligned_chunk<512> { uint32_t buff[128]; }; - template<> struct __align__(1024) aligned_chunk<1024> { uint32_t buff[256]; }; - template<> struct __align__(2048) aligned_chunk<2048> { uint32_t buff[512]; }; - template<> struct __align__(4096) aligned_chunk<4096> { uint32_t buff[1024]; }; - - /// std::aligned_storage - template - struct aligned_storage - { - typedef aligned_chunk type[Len / sizeof(aligned_chunk)]; - }; - -#else - - using std::aligned_storage; - -#endif - - - - -}; // namespace nv_std - diff --git a/cutlass/util/platform.h b/cutlass/util/platform.h new file mode 100644 index 00000000..32c41a67 --- /dev/null +++ b/cutlass/util/platform.h @@ -0,0 +1,801 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#pragma once + +/** + * \file + * \brief C++ features that may be otherwise unimplemented for CUDA device functions. + * + * This file has three components: + * + * (1) Macros: + * - Empty macro defines for C++ keywords not supported by the current + * version of C++. These simply allow compilation to proceed (but do + * not provide the added semantics). + * - \p noexcept + * - \p constexpr + * - \p nullptr + * - \p static_assert + * + * - Macro functions that we need in constant expressions because the + * C++ equivalents require constexpr compiler support. These are + * prefixed with \p __NV_STD_* + * - \p __NV_STD_MAX + * - \p __NV_STD_MIN + * + * (2) Re-implementations of STL functions and types: + * - C++ features that need the \p __device__ annotation. These are + * placed into the \p platform namespace. + * - \p plus + * - \p less + * - \p greater + * - \p min + * - \p max + * - \p methods on std::pair (==, !=, <, <=, >, >=, and make_pair()) + * + * (3) Stop-gap implementations of unsupported STL functions and types: + * - STL functions and types defined by C++ 11/14/17/etc. that are not + * provided by the current version of C++. These are placed into the + * \p platform namespace + * - \p integral_constant + * - \p nullptr_t + * - \p true_type + * - \p false_type + * - \p bool_constant + * - \p enable_if + * - \p conditional + * - \p is_same + * - \p is_base_of + * - \p remove_const + * - \p remove_volatile + * - \p remove_cv + * - \p is_volatile + * - \p is_pointer + * - \p is_void + * - \p is_integral + * - \p is_floating_point + * - \p is_arithmetic + * - \p is_fundamental + * - \p is_trivially_copyable + * - \p alignment_of + * - \p aligned_storage + * + * (4) Functions and types that are STL-like (but aren't in the STL): + * - \p TODO: min and max functors? + * + * The idea is that, as we drop support for older compilers, we can simply #define + * the \p __NV_STD_XYZ macros and \p platform namespace to alias their C++ + * counterparts (or trivially find-and-replace their occurrences in code text). + */ + +//----------------------------------------------------------------------------- +// Dependencies +//----------------------------------------------------------------------------- + +#include + +#if !defined(__CUDACC_RTC__) +//----------------------------------------------------------------------------- +// Include STL files that platform provides functionality for +//----------------------------------------------------------------------------- + +#include // Minimum/maximum operations +#include // nullptr_t +#include // Arithmetic operations +#include // For methods on std::pair +#if (!defined(_MSC_VER) && (__cplusplus >= 201103L)) || (defined(_MSC_VER) && (_MS_VER >= 1500)) +#include // For integral constants, conditional metaprogramming, and type traits +#endif + +#include + +#endif +/****************************************************************************** + * Macros + ******************************************************************************/ +//----------------------------------------------------------------------------- +// Keywords +//----------------------------------------------------------------------------- + +/// noexcept, constexpr +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1900)) +#ifndef noexcept +#define noexcept +#endif +#ifndef constexpr +#define constexpr +#endif +#endif + +/// nullptr +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1310)) +#ifndef nullptr +#define nullptr 0 +#endif +#endif + +/// static_assert +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600)) +#ifndef static_assert +#define __platform_cat_(a, b) a##b +#define __platform_cat(a, b) __platform_cat_(a, b) +#define static_assert(__e, __m) typedef int __platform_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1] +#endif +#endif + +//----------------------------------------------------------------------------- +// Functions +//----------------------------------------------------------------------------- + +/// Select maximum(a, b) +#ifndef __NV_STD_MAX +#define __NV_STD_MAX(a, b) (((b) > (a)) ? (b) : (a)) +#endif + +/// Select minimum(a, b) +#ifndef __NV_STD_MIN +#define __NV_STD_MIN(a, b) (((b) < (a)) ? (b) : (a)) +#endif + +/****************************************************************************** + * Re-implementations + ******************************************************************************/ +namespace cutlass { +namespace platform { + +//----------------------------------------------------------------------------- +// Arithmetic operations, comparisons +//----------------------------------------------------------------------------- + +/// platform::plus +template +struct plus { + CUTLASS_HOST_DEVICE constexpr T operator()(const T& lhs, const T& rhs) const { return lhs + rhs; } +}; + +/// std::less +template +struct less { + CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const { + return lhs < rhs; + } +}; + +/// std::greater +template +struct greater { + CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const { + return lhs > rhs; + } +}; + +//----------------------------------------------------------------------------- +// Minimum/maximum operations +//----------------------------------------------------------------------------- + +/// std::min +template +CUTLASS_HOST_DEVICE constexpr const T& min(const T& a, const T& b) { + return (b < a) ? b : a; +} + +/// std::max +template +CUTLASS_HOST_DEVICE constexpr const T& max(const T& a, const T& b) { + return (a < b) ? b : a; +} + +#if !defined(__CUDACC_RTC__) +//----------------------------------------------------------------------------- +// Methods on std::pair +//----------------------------------------------------------------------------- + +using std::pair; + +template +CUTLASS_HOST_DEVICE constexpr bool operator==(const pair& lhs, const pair& rhs) { + return (lhs.first == rhs.first) && (lhs.second == rhs.second); +} + +template +CUTLASS_HOST_DEVICE constexpr bool operator!=(const pair& lhs, const pair& rhs) { + return (lhs.first != rhs.first) && (lhs.second != rhs.second); +} + +template +CUTLASS_HOST_DEVICE constexpr bool operator<(const pair& lhs, const pair& rhs) { + return (lhs.first < rhs.first) ? true : (rhs.first < lhs.first) ? false + : (lhs.second < rhs.second); +} + +template +CUTLASS_HOST_DEVICE constexpr bool operator<=(const pair& lhs, const pair& rhs) { + return !(rhs < lhs); +} + +template +CUTLASS_HOST_DEVICE constexpr bool operator>(const pair& lhs, const pair& rhs) { + return (rhs < lhs); +} + +template +CUTLASS_HOST_DEVICE constexpr bool operator>=(const pair& lhs, const pair& rhs) { + return !(lhs < rhs); +} + +template +CUTLASS_HOST_DEVICE std::pair make_pair(T1 t, T2 u) { + std::pair retval; + retval.first = t; + retval.second = u; + return retval; +} +#endif + +} // namespace platform + +/****************************************************************************** + * Implementations of C++ 11/14/17/... STL features + ******************************************************************************/ + +namespace platform { + +//----------------------------------------------------------------------------- +// Integral constant helper types +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) + +/// std::integral_constant +template +struct integral_constant; + +/// std::integral_constant +template +struct integral_constant { + static const value_t value = V; + + typedef value_t value_type; + typedef integral_constant type; + + CUTLASS_HOST_DEVICE operator value_type() const { return value; } + + CUTLASS_HOST_DEVICE const value_type operator()() const { return value; } +}; + +#else + +using std::integral_constant; +using std::pair; + +#endif + +/// The type used as a compile-time boolean with true value. +typedef integral_constant true_type; + +/// The type used as a compile-time boolean with false value. +typedef integral_constant false_type; + +#if (!defined(_MSC_VER) && (__cplusplus < 201402L)) || (defined(_MSC_VER) && (_MSC_VER < 1900)) + +/// std::bool_constant +template +struct bool_constant : platform::integral_constant {}; + +#else + +using std::bool_constant; + +#endif + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1700)) + +/// std::nullptr_t +struct nullptr_t {}; + +#else + +using std::nullptr_t; + +#endif + +//----------------------------------------------------------------------------- +// Conditional metaprogramming +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600)) + +/// std::enable_if (true specialization) +template +struct enable_if { + typedef T type; +}; + +/// std::enable_if (false specialization) +template +struct enable_if {}; + +/// std::conditional (true specialization) +template +struct conditional { + typedef T type; +}; + +/// std::conditional (false specialization) +template +struct conditional { + typedef F type; +}; + +#else + +using std::enable_if; +using std::conditional; + +#endif + +//----------------------------------------------------------------------------- +// Const/volatility specifiers +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) + +/// std::remove_const (non-const specialization) +template +struct remove_const { + typedef T type; +}; + +/// std::remove_const (const specialization) +template +struct remove_const { + typedef T type; +}; + +/// std::remove_volatile (non-volatile specialization) +template +struct remove_volatile { + typedef T type; +}; + +/// std::remove_volatile (volatile specialization) +template +struct remove_volatile { + typedef T type; +}; + +/// std::remove_cv +template +struct remove_cv { + typedef typename remove_volatile::type>::type type; +}; + +#else + +using std::remove_const; +using std::remove_volatile; +using std::remove_cv; + +#endif + +//----------------------------------------------------------------------------- +// Type relationships +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) + +/// std::is_same (false specialization) +template +struct is_same : false_type {}; + +/// std::is_same (true specialization) +template +struct is_same : true_type {}; + +/// Helper for std::is_base_of +template +struct is_base_of_helper { + typedef char (&yes)[1]; + typedef char (&no)[2]; + + template + struct dummy { + CUTLASS_HOST_DEVICE operator B*() const; + CUTLASS_HOST_DEVICE operator D*(); + }; + + template + CUTLASS_HOST_DEVICE static yes check(DerivedT*, T); + + CUTLASS_HOST_DEVICE static no check(BaseT*, int); + + static const bool value = sizeof(check(dummy(), int())) == sizeof(yes); +}; + +/// std::is_base_of +template +struct is_base_of + : integral_constant::type, + typename remove_cv::type>::value) || + (is_same::type, + typename remove_cv::type>::value)> {}; + +#else + +using std::is_same; +using std::is_base_of; + +#endif + +//----------------------------------------------------------------------------- +// Type properties +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) + +/// std::is_volatile +template +struct is_volatile : false_type {}; +template +struct is_volatile : true_type {}; + +/// Helper for std::is_pointer (false specialization) +template +struct is_pointer_helper : false_type {}; + +/// Helper for std::is_pointer (true specialization) +template +struct is_pointer_helper : true_type {}; + +/// std::is_pointer +template +struct is_pointer : is_pointer_helper::type> {}; + +/// std::is_void +template +struct is_void : is_same::type> {}; + +/// std::is_integral +template +struct is_integral : false_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template +struct is_integral : is_integral {}; +template +struct is_integral : is_integral {}; +template +struct is_integral : is_integral {}; + +/// std::is_floating_point +template +struct is_floating_point + : integral_constant::type>::value || + is_same::type>::value)> {}; + +/// std::is_arithmetic +template +struct is_arithmetic + : integral_constant::value || is_floating_point::value)> {}; + +/// std::is_fundamental +template +struct is_fundamental + : integral_constant::value || is_void::value || + is_same::type>::value)> {}; + +#else + +using std::is_volatile; +using std::is_pointer; +using std::is_void; +using std::is_integral; +using std::is_floating_point; +using std::is_arithmetic; +using std::is_fundamental; + +#endif + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \ + (defined(__GNUG__) && (__GNUC__ < 5)) + +/** + * std::is_trivially_copyable + * + * This implementation only evaluates true if T is fundamental or pointer + * + * Without help from partial template specializations provided by the user for + * a specific class or struct, this trait will never report that the specified + * class or struct is trivially-copyable ; this is always safe, + * if possibly sub-optimal. + */ +template +struct is_trivially_copyable + : integral_constant::value || is_pointer::value)> {}; + +#else + +using std::is_trivially_copyable; + +#endif + +//----------------------------------------------------------------------------- +// Alignment and layout utilities +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) + +/// std::alignment_of +template +struct alignment_of { + struct pad { + value_t val; + char byte; + }; + + enum { value = sizeof(pad) - sizeof(value_t) }; +}; + +#else + +template +struct alignment_of : std::alignment_of {}; + +#endif + +/* 16B specializations where 32-bit Win32 host compiler disagrees with device compiler */ +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; + +// Specializations for volatile/const qualified types +template +struct alignment_of : alignment_of {}; +template +struct alignment_of : alignment_of {}; +template +struct alignment_of : alignment_of {}; + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800)) + +template +struct aligned_chunk; +template <> +struct __align__(1) aligned_chunk<1> { + uint8_t buff; +}; +template <> +struct __align__(2) aligned_chunk<2> { + uint16_t buff; +}; +template <> +struct __align__(4) aligned_chunk<4> { + uint32_t buff; +}; +template <> +struct __align__(8) aligned_chunk<8> { + uint32_t buff[2]; +}; +template <> +struct __align__(16) aligned_chunk<16> { + uint32_t buff[4]; +}; +template <> +struct __align__(32) aligned_chunk<32> { + uint32_t buff[8]; +}; +template <> +struct __align__(64) aligned_chunk<64> { + uint32_t buff[16]; +}; +template <> +struct __align__(128) aligned_chunk<128> { + uint32_t buff[32]; +}; +template <> +struct __align__(256) aligned_chunk<256> { + uint32_t buff[64]; +}; +template <> +struct __align__(512) aligned_chunk<512> { + uint32_t buff[128]; +}; +template <> +struct __align__(1024) aligned_chunk<1024> { + uint32_t buff[256]; +}; +template <> +struct __align__(2048) aligned_chunk<2048> { + uint32_t buff[512]; +}; +template <> +struct __align__(4096) aligned_chunk<4096> { + uint32_t buff[1024]; +}; + +/// std::aligned_storage +template +struct aligned_storage { + typedef aligned_chunk type[Len / sizeof(aligned_chunk)]; +}; + +#else + +using std::aligned_storage; + +#endif + +#if !defined(__CUDACC_RTC__) +/// Default deleter +template +struct default_delete { + void operator()(T* ptr) const { delete ptr; } +}; + +/// Partial specialization for deleting array types +template +struct default_delete { + void operator()(T* ptr) const { delete[] ptr; } +}; + +/// std::unique_ptr +template > +class unique_ptr { + public: + typedef T* pointer; + typedef T element_type; + typedef Deleter deleter_type; + + private: + /// Pointer to memory + pointer _ptr; + + /// Deleter + deleter_type _deleter; + + public: + unique_ptr() : _ptr(nullptr) {} + unique_ptr(pointer p) : _ptr(p) {} + + ~unique_ptr() { + if (_ptr) { + _deleter(_ptr); + } + } + /// Returns a pointer to the managed object or nullptr if no object is owned. + pointer get() const noexcept { return _ptr; } + + /// Releases ownership of the managed object, if any + pointer release() noexcept { + pointer p(_ptr); + _ptr = nullptr; + return p; + } + + /// Replaces the managed object, deleting the old object. + void reset(pointer p = pointer()) noexcept { + pointer old_ptr = _ptr; + _ptr = p; + if (old_ptr != nullptr) { + get_deleter()(old_ptr); + } + } + + /// Swaps the managed objects with *this and another unique_ptr + void swap(unique_ptr& other) noexcept { std::swap(_ptr, other._ptr); } + + /// Returns the deleter object + Deleter& get_deleter() noexcept { return _deleter; } + + /// Returns the deleter object + Deleter const& get_deleter() const noexcept { return _deleter; } + + /// Checks whether an object is owned + operator bool() const noexcept { return _ptr != nullptr; } + + /// Dereferences the unique_ptr + T& operator*() const { return *_ptr; } + + /// Returns a pointer to the managed object + pointer operator->() const noexcept { return _ptr; } + + /// Array access to managed object + T& operator[](size_t i) const { return _ptr[i]; } +}; + +/// Specializes the swap algorithm +template +void swap(unique_ptr& lhs, unique_ptr& rhs) noexcept { + lhs.swap(rhs); +} +#endif + +}; // namespace platform +}; // namespace cutlass diff --git a/cutlass/util/printable.h b/cutlass/util/printable.h deleted file mode 100644 index dd7bda40..00000000 --- a/cutlass/util/printable.h +++ /dev/null @@ -1,72 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Pure virtual base class for printable types - */ - -#include - - -namespace cutlass { - - -/****************************************************************************** - * printable_t - ******************************************************************************/ - -/** - * Pure virtual base class for printable types - */ -struct printable_t -{ - /// Returns the instance as a string - __host__ __device__ inline - virtual char const* to_string() const = 0; - - /// Insert the formatted instance into the output stream - virtual void print(std::ostream& out) const = 0; - - /// Destructor - virtual ~printable_t() {} -}; - - -/// Insert the formatted \p printable into the output stream -std::ostream& operator<<( - std::ostream& out, - printable_t const& printable) -{ - printable.print(out); - return out; -} - - -} // namespace cutlass diff --git a/cutlass/util/util.h b/cutlass/util/util.h deleted file mode 100644 index e4247ccd..00000000 --- a/cutlass/util/util.h +++ /dev/null @@ -1,82 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Umbrella header file for utilities - */ - -#include "debug.h" -#include "device_introspection.h" -#include "io_intrinsics.h" -#include "math.h" -#include "nv_std.h" -#include "printable.h" -#include "matrix_transform.h" - - - -namespace cutlass { - - -/****************************************************************************** - * int_constant - ******************************************************************************/ - -/** - * Shorthand for nv_std::integral_constant of int32_t type - */ -template -struct int_constant : nv_std::integral_constant -{}; - - -/****************************************************************************** - * Uninitialized - ******************************************************************************/ - -/** - * \brief A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions - */ -template -struct __align__(16) uninitialized -{ - /// Backing storage - uint8_t storage[sizeof(T)]; - - /// Alias - __host__ __device__ __forceinline__ T& alias() - { - return reinterpret_cast(*this); - } -}; - - - -} // namespace cutlass diff --git a/cutlass/vector.h b/cutlass/vector.h new file mode 100644 index 00000000..a66dfdef --- /dev/null +++ b/cutlass/vector.h @@ -0,0 +1,229 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a 1D vector of elements held in the registers of each thread. +*/ +#pragma once + +#if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) +#include +#endif + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct AlignedStruct {}; + +template <> +struct __align__(1) AlignedStruct<1>{}; +template <> +struct __align__(2) AlignedStruct<2>{}; +template <> +struct __align__(4) AlignedStruct<4>{}; +template <> +struct __align__(8) AlignedStruct<8>{}; +template <> +struct __align__(16) AlignedStruct<16>{}; +template <> +struct __align__(32) AlignedStruct<32>{}; +template <> +struct __align__(64) AlignedStruct<64>{}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +union Vector { + /// The scalar type. + typedef Scalar_ Scalar; + + /// The number of elements in the vector. + enum { kLanes = kLanes_ }; + /// The size of the vector. + enum { kVectorSize = kLanes * (int)sizeof(Scalar) }; + /// The number of registers needed to store the vector. + enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 }; + + // Make sure that the vector type makes sense. + static_assert(kVectorSize <= 16, "Vector type is too large"); + + /// The aligned storage to make sure we have good alignment. + AlignedStruct aligned_; + /// The associated array of scalars. + Scalar scalars[kLanes]; + /// The data in registers. + uint32_t registers[kRegisters]; + + /// Accessor to the ith lane. + CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const { return scalars[i]; } + /// Accessor to the ith lane. + CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return scalars[i]; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) + +template +union Vector { + /// The scalar type. + typedef half Scalar; + + /// The number of elements in the vector. + enum { kLanes = kLanes_ }; + /// The size of the vector. + enum { kVectorSize = kLanes * (int)sizeof(Scalar) }; + /// The number of registers needed to store the vector. + enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 }; + + // Make sure that the vector type makes sense. + static_assert(kVectorSize <= size_t(16), "Vector type is too large"); + + /// The aligned storage to make sure we have good alignment. + AlignedStruct aligned_; + /// The associated array of scalars. + uint16_t scalars[kLanes]; + /// The data in registers. + uint32_t registers[kRegisters]; + + /// Accessor to the ith lane. + CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const { + return reinterpret_cast(scalars[i]); + } + /// Accessor to the ith lane. + CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return reinterpret_cast(scalars[i]); } +}; + +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +CUTLASS_DEVICE void make_zero(Scalar_& x) { + x = Scalar_(0); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Vectorize { + typedef Vector Type; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Vectorize { + typedef Element_ Type; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +CUTLASS_DEVICE void make_zero(Vector& vec) { + for (int i = 0; i < Vector::kRegisters; ++i) { + vec.registers[i] = 0; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// cutlass::Extent similar to std::extent but applicable to CUTLASS types +// + +/// Returns the extent of a scalar or vector +template +struct Extent { + static size_t const kValue = 1; +}; + +/// Returns the number of lanes of a vector if need be +template +struct Extent > { + static size_t const kValue = Lanes; +}; + +/// Returns the number of lanes of a vector if need be +template +struct Extent const> { + static size_t const kValue = Lanes; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Traits describing properties of vectors and scalar-as-vectors +template +struct VectorTraits { + /// Scalar type + typedef T Scalar; + + /// Number of lanes of vector + static int const kLanes = 1; + + /// True if the type is actually a cutlass::Vector, otherwise false + static bool const IsVector = false; + + /// Type that is always a vector + typedef Vector Vector; +}; + +/// Partial specialization for actual cutlass::Vector +template +struct VectorTraits > { + /// Scalar type + typedef T Scalar; + + /// Number of lanes of vector + static int const kLanes = Lanes; + + /// Type is actually a cutlass::Vector + static bool const IsVector = true; + + /// Type that is always a Vector + typedef Vector Vector; +}; + +/// Partial specialization for actual cutlass::Vector +template +struct VectorTraits const> { + /// Scalar type + typedef T Scalar; + + /// Number of lanes of vector + static int const kLanes = Lanes; + + /// Type is actually a cutlass::Vector + static bool const IsVector = true; + + /// Type that is always a Vector + typedef Vector Vector; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/wmma_matrix.h b/cutlass/wmma_matrix.h new file mode 100644 index 00000000..c4d8a0b5 --- /dev/null +++ b/cutlass/wmma_matrix.h @@ -0,0 +1,193 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Abstractions for loading and storing matrices using the CUDA WMMA API. +*/ +#pragma once + +#if defined(__CUDACC__) && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700) + +// Dependent header files should use the following macro to guard all code using +// nvcuda::wmma:: to enable compilation for CUDA Compute Capabilities < sm_70. +// Earlier shader models not support Tensor Cores. +#define CUTLASS_USE_WMMA_API + +#include "stdio.h" + +#include +#include +#include +#include +#include +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Statically maps cutlass::MatrixLayout => nvcuda::wmma layout tags +template +struct WmmaLayout { + typedef nvcuda::wmma::col_major Layout; +}; + +/// Statically maps cutlass::MatrixLayout => nvcuda::wmma layout tags +template <> +struct WmmaLayout { + typedef nvcuda::wmma::row_major Layout; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to nvcuda::wmma fragment load and store operations +template +struct WmmaMatrix {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to nvcuda::wmma fragment accessors for A operand +template +struct WmmaMatrix + : public nvcuda::wmma::fragment< + /// The nvcuda::wmma operand name. + nvcuda::wmma::matrix_a, + /// The dimensions. + WmmaShape_::kW, + WmmaShape_::kH, + WmmaShape_::kD, + /// The scalar. + Scalar_, + /// The layout. + typename WmmaLayout::Layout> { + /// This type. + typedef WmmaMatrix This_; + + /// Fill-in the element. + CUTLASS_DEVICE This_& operator=(Scalar_ const& x) { + nvcuda::wmma::fill_fragment(*this, x); + return *this; + } + + /// Load from memory. + CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) { + nvcuda::wmma::load_matrix_sync(*this, pointer, stride); + } + + /// Store to memory. + CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const { + nvcuda::wmma::store_matrix_sync(pointer, *this, stride); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to nvcuda::wmma fragment accessors for B operand +template +struct WmmaMatrix + : public nvcuda::wmma::fragment< + /// The nvcuda::wmma operand name. + nvcuda::wmma::matrix_b, + /// The dimensions. + WmmaShape_::kW, + WmmaShape_::kH, + WmmaShape_::kD, + /// The scalar. + Scalar_, + /// The layout. + typename WmmaLayout::Layout> { + /// This type. + typedef WmmaMatrix This_; + + /// Fill-in the element. + CUTLASS_DEVICE This_& operator=(Scalar_ const& x) { + nvcuda::wmma::fill_fragment(*this, x); + return *this; + } + + /// Load from memory. + CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) { + nvcuda::wmma::load_matrix_sync(*this, pointer, stride); + } + + /// Store to memory. + CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const { + nvcuda::wmma::store_matrix_sync(pointer, *this, stride); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to nvcuda::wmma fragment accessors for C operand +template +struct WmmaMatrix + : public nvcuda::wmma::fragment< + /// The nvcuda::wmma operand name. + nvcuda::wmma::accumulator, + /// The dimensions. + WmmaShape_::kW, + WmmaShape_::kH, + WmmaShape_::kD, + /// The scalar. + Scalar_> { + /// This type. + typedef WmmaMatrix This_; + /// The layout. + static MatrixLayout::Kind const kLayout = kLayout_; + + /// Fill-in the element. + CUTLASS_DEVICE This_& operator=(Scalar_ const& x) { + nvcuda::wmma::fill_fragment(*this, x); + return *this; + } + + /// Load from memory. + CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) { + bool const kIsRowMajor = kLayout == MatrixLayout::kRowMajor; + nvcuda::wmma::load_matrix_sync( + *this, + pointer, + stride, + kIsRowMajor ? nvcuda::wmma::mem_row_major : nvcuda::wmma::mem_col_major); + } + + /// Store to memory. + CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const { + bool const kIsRowMajor = kLayout == MatrixLayout::kRowMajor; + nvcuda::wmma::store_matrix_sync( + pointer, + *this, + stride, + kIsRowMajor ? nvcuda::wmma::mem_row_major : nvcuda::wmma::mem_col_major); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass_test/.gitignore b/cutlass_test/.gitignore deleted file mode 100644 index 5628abb9..00000000 --- a/cutlass_test/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -/bin/ -/gemm-GPU.csv -/gemm-REF.csv -/a.csv -/b.csv -/gp100_schmoo/ -/ignore/ diff --git a/cutlass_test/Makefile b/cutlass_test/Makefile deleted file mode 100644 index 8b4b87ee..00000000 --- a/cutlass_test/Makefile +++ /dev/null @@ -1,180 +0,0 @@ -#/****************************************************************************** -# * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. -# * -# * Redistribution and use in source and binary forms, with or without -# * modification, are permitted provided that the following conditions are met: -# * * Redistributions of source code must retain the above copyright -# * notice, this list of conditions and the following disclaimer. -# * * Redistributions in binary form must reproduce the above copyright -# * notice, this list of conditions and the following disclaimer in the -# * documentation and/or other materials provided with the distribution. -# * * Neither the name of the NVIDIA CORPORATION nor the -# * names of its contributors may be used to endorse or promote products -# * derived from this software without specific prior written permission. -# * -# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY -# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# * -# ******************************************************************************/ - - -#------------------------------------------------------------------------------- -# -# Makefile usage -# -# make sm= [transpose=] [verbose=<0*|1>] [keep=<0*|1>] -# -# * : default -# -#------------------------------------------------------------------------------- - -TEST_DIR := $(dir $(lastword $(MAKEFILE_LIST))) - -include ../common.mk - - -#------------------------------------------------------------------------------- -# Commandline Options -#------------------------------------------------------------------------------- - -ifdef transpose - TRANSPOSE := $(transpose) -else - TRANSPOSE := nn -endif - -# If defined, GEMMs only compiled with specified alignment restrictions on A and B -# matrices. Otherwise, kernels are compiled for all feasible alignment options, and -# the appropriate kernel is selected. -ifdef alignment - DEFINES += -DGEMM_ALIGNMENT=$(alignment) -endif - -# If defined as false, ragged handling can be disabled. -ifdef ragged - DEFINES += -DGEMM_RAGGED=$(ragged) -endif - -#------------------------------------------------------------------------------- -# Include and Library paths -#------------------------------------------------------------------------------- - -INC += -I$(TEST_DIR) -INC += -I$(BASE_DIR) - -LIBS += -lcublas - -#------------------------------------------------------------------------------- -# Preprocessor definitions -#------------------------------------------------------------------------------- - -ifeq (nt, $(TRANSPOSE)) - DEFINES += -DTRANSPOSE_B -else ifeq (tn, $(TRANSPOSE)) - DEFINES += -DTRANSPOSE_A - -else ifeq (tt, $(TRANSPOSE)) - DEFINES += -DTRANSPOSE_A - DEFINES += -DTRANSPOSE_B -endif - -NVCCFLAGS += -std=c++11 - - -#------------------------------------------------------------------------------- -# Dependency Lists -#------------------------------------------------------------------------------- - -DEPS := $(call rwildcard, $(BASE_DIR),*.h) \ - $(call rwildcard, $(BASE_DIR)cgl,*.h) \ - $(BASE_DIR)common.mk \ - $(TEST_DIR)Makefile - - -ALL := sgemm \ - dgemm \ - hgemm \ - igemm - - -#------------------------------------------------------------------------------- -# make default -#------------------------------------------------------------------------------- - -default: - - -#------------------------------------------------------------------------------- -# make clean -#------------------------------------------------------------------------------- - -clean : - rm -f bin/* - rm -f *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx *.hash *.cu.cpp *.o *.obj* *dlink.* *.res *.fatbin *.module_id - - -#------------------------------------------------------------------------------- -# make all -#------------------------------------------------------------------------------- - -all : $(ALL) - - -#------------------------------------------------------------------------------- -# make sgemm -#------------------------------------------------------------------------------- - -sgemm: bin/sgemm_$(TRANSPOSE)_$(BIN_SUFFIX) - -bin/sgemm_$(TRANSPOSE)_$(BIN_SUFFIX) : gemm.cu $(DEPS) - mkdir -p bin - $(NVCC) -DTEST_SGEMM $(DEFINES) $(SM_TARGETS) -o $@ gemm.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBINC) $(LIBS) - -#------------------------------------------------------------------------------- -# make dgemm -#------------------------------------------------------------------------------- - -dgemm: bin/dgemm_$(TRANSPOSE)_$(BIN_SUFFIX) - -bin/dgemm_$(TRANSPOSE)_$(BIN_SUFFIX) : gemm.cu $(DEPS) - mkdir -p bin - $(NVCC) -DTEST_DGEMM $(DEFINES) $(SM_TARGETS) -o $@ gemm.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBINC) $(LIBS) - -#------------------------------------------------------------------------------- -# make hgemm -#------------------------------------------------------------------------------- - -hgemm: bin/hgemm_$(TRANSPOSE)_$(BIN_SUFFIX) - -bin/hgemm_$(TRANSPOSE)_$(BIN_SUFFIX) : gemm.cu $(DEPS) - mkdir -p bin - $(NVCC) -DTEST_HGEMM $(DEFINES) $(SM_TARGETS) -o $@ gemm.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBINC) $(LIBS) - -#------------------------------------------------------------------------------- -# make igemm -#------------------------------------------------------------------------------- - -igemm: bin/igemm_$(TRANSPOSE)_$(BIN_SUFFIX) - -bin/igemm_$(TRANSPOSE)_$(BIN_SUFFIX) : gemm.cu $(DEPS) - mkdir -p bin - $(NVCC) -DTEST_IGEMM $(DEFINES) $(SM_TARGETS) -o $@ gemm.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBINC) $(LIBS) - -#------------------------------------------------------------------------------- -# make wgemm -#------------------------------------------------------------------------------- - -wgemm: bin/wgemm_$(TRANSPOSE)_$(BIN_SUFFIX) - -bin/wgemm_$(TRANSPOSE)_$(BIN_SUFFIX) : gemm.cu $(DEPS) - mkdir -p bin - $(NVCC) -DTEST_WGEMM -DWMMA $(DEFINES) $(SM_TARGETS) -o $@ gemm.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBINC) $(LIBS) - diff --git a/cutlass_test/cublas_dispatch.h b/cutlass_test/cublas_dispatch.h deleted file mode 100644 index 9b21926e..00000000 --- a/cutlass_test/cublas_dispatch.h +++ /dev/null @@ -1,300 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * C++ interface for dispatching CUBLAS GEMM calls - */ - -#include - -namespace cutlass { - - -/****************************************************************************** - * cuBLAS dispatch entrypoints - ******************************************************************************/ - -/** - * Dispatch cuBLAS igemm - */ -cublasStatus_t cublas_gemm_dispatch( - cublasHandle_t cublas_handle, ///< CUBLAS handle - cublasOperation_t transform_a, ///< Transform op(A) that is non- or (conj.) transpose. - cublasOperation_t transform_b, ///< Transform op(B) that is non- or (conj.) transpose. - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - int32_t alpha, ///< Scalar used for multiplicands - int8_t *d_a, ///< Device pointer to matrix A array values - int8_t *d_b, ///< Device pointer to matrix B array values - int32_t beta, ///< Scalar used for addend - int32_t *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. -{ - return cublasGemmEx( - cublas_handle, - transform_a, - transform_b, - m, - n, - k, - (void*) &alpha, - (void*) d_a, - CUDA_R_8I, - (transform_a == CUBLAS_OP_N) ? m : k, - (void*) d_b, - CUDA_R_8I, - (transform_b == CUBLAS_OP_N) ? k : n, - (void*) &beta, - (void*) d_c, - CUDA_R_32I, - m, - CUDA_R_32I, - CUBLAS_GEMM_DFALT); -} - - -/** - * Dispatch cuBLAS hgemm - */ -cublasStatus_t cublas_gemm_dispatch( - cublasHandle_t cublas_handle, ///< CUBLAS handle - cublasOperation_t transform_a, ///< Transform op(A) that is non- or (conj.) transpose. - cublasOperation_t transform_b, ///< Transform op(B) that is non- or (conj.) transpose. - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - __half alpha, ///< Scalar used for multiplicands - __half *d_a, ///< Device pointer to matrix A array values - __half *d_b, ///< Device pointer to matrix B array values - __half beta, ///< Scalar used for addend - __half *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. -{ - return cublasHgemm( - cublas_handle, transform_a, transform_b, - m, n, k, - &alpha, - d_a, - (transform_a == CUBLAS_OP_N) ? m : k, - d_b, - (transform_b == CUBLAS_OP_N) ? k : n, - &beta, - d_c, - m); - -} - - -/** - * Dispatch cuBLAS sgemm - */ -cublasStatus_t cublas_gemm_dispatch( - cublasHandle_t cublas_handle, ///< CUBLAS handle - cublasOperation_t transform_a, ///< Transform op(A) that is non- or (conj.) transpose. - cublasOperation_t transform_b, ///< Transform op(B) that is non- or (conj.) transpose. - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - float alpha, ///< Scalar used for multiplicands - float *d_a, ///< Device pointer to matrix A array values - float *d_b, ///< Device pointer to matrix B array values - float beta, ///< Scalar used for addend - float *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. -{ - return cublasSgemm( - cublas_handle, transform_a, transform_b, - m, n, k, - &alpha, - d_a, - (transform_a == CUBLAS_OP_N) ? m : k, - d_b, - (transform_b == CUBLAS_OP_N) ? k : n, - &beta, - d_c, - m); -} - - -/** - * Dispatch cuBLAS dgemm - */ -cublasStatus_t cublas_gemm_dispatch( - cublasHandle_t cublas_handle, ///< CUBLAS handle - cublasOperation_t transform_a, ///< Transform op(A) that is non- or (conj.) transpose. - cublasOperation_t transform_b, ///< Transform op(B) that is non- or (conj.) transpose. - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - double alpha, ///< Scalar used for multiplicands - double *d_a, ///< Device pointer to matrix A array values - double *d_b, ///< Device pointer to matrix B array values - double beta, ///< Scalar used for addend - double *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. -{ - return cublasDgemm( - cublas_handle, transform_a, transform_b, - m, n, k, - &alpha, - d_a, (transform_a == CUBLAS_OP_N) ? m : k, - d_b, (transform_b == CUBLAS_OP_N) ? k : n, - &beta, - d_c, m); -} - -/** - * Dispatch cuBLAS Tensor Cores GEMM - */ -cublasStatus_t cublas_gemm_dispatch( - cublasHandle_t cublas_handle, ///< CUBLAS handle - cublasOperation_t transform_a, ///< Transform op(A) that is non- or (conj.) transpose. - cublasOperation_t transform_b, ///< Transform op(B) that is non- or (conj.) transpose. - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - float alpha, ///< Scalar used for multiplicands - half *d_a, ///< Device pointer to matrix A array values - half *d_b, ///< Device pointer to matrix B array values - float beta, ///< Scalar used for addend - float *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. -{ - return cublasGemmEx( - cublas_handle, - transform_a, - transform_b, - m, - n, - k, - (void*) &alpha, - (void*) d_a, - CUDA_R_16F, - (transform_a == CUBLAS_OP_N) ? m : k, - (void*) d_b, - CUDA_R_16F, - (transform_b == CUBLAS_OP_N) ? k : n, - (void*) &beta, - (void*) d_c, - CUDA_R_32F, - m, - CUDA_R_32F, - CUBLAS_GEMM_DFALT_TENSOR_OP); -} - - -/** - * Uses cuBLAS to compute gemm on device matrices (unspecialized) - */ -template < - gemm::tiling_strategy::kind_t _TilingStrategy, ///< Tile-sizing classification category - math_operation_class_t _math_op, - matrix_transform_t::kind_t _TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t _TransformB, ///< Transformation op for matrix B - typename _value, ///< Multiplicand value type (matrices A and B) - typename _accum ///< Accumulator value type (matrix C and scalars) -> -struct cublas_gemm -{ - // - // Type alias definitions - // - - static const gemm::tiling_strategy::kind_t TilingStrategy = _TilingStrategy; - static const math_operation_class_t math_op = _math_op; - static const matrix_transform_t::kind_t TransformA = _TransformA; - static const matrix_transform_t::kind_t TransformB = _TransformB; - - using value_t = _value; - using accum_t = _accum; - - /// Launches a GEMM - gemm::launch_configuration operator()( - cublasHandle_t cublas_handle, ///< CUBLAS handle - int m, - int n, - int k, - value_t *A, ///< A matrix - value_t *B, ///< B matrix - accum_t *C, ///< C matrix - accum_t alpha, ///< Scalar used for multiplicands - accum_t beta, ///< Scalar used for addend - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. - { - cublasStatus_t cublas_error = cublas_gemm_dispatch( - cublas_handle, - (cublasOperation_t) TransformA, - (cublasOperation_t) TransformB, - m, - n, - k, - alpha, - A, - B, - beta, - C, - stream, - debug_synchronous); - - cudaError_t error; - if (cublas_error != CUBLAS_STATUS_SUCCESS) - { - if (cublas_error == CUBLAS_STATUS_NOT_SUPPORTED) { - return gemm::launch_configuration(cudaErrorInvalidValue); - } - - error = cudaGetLastError(); - if (error == cudaSuccess) { - return gemm::launch_configuration(cudaErrorUnknown); - } - return error; - } - - // Check for failure to launch - if (CUDA_PERROR_DEBUG(error = cudaPeekAtLastError())) - return gemm::launch_configuration(error); - - // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CUDA_PERROR_DEBUG(error = cudaStreamSynchronize(stream)))) - return gemm::launch_configuration(error); - - return gemm::launch_configuration(error); - } -}; - - -} // namespace cutlass diff --git a/cutlass_test/cutlass_dispatch.h b/cutlass_test/cutlass_dispatch.h deleted file mode 100644 index 43bd7e67..00000000 --- a/cutlass_test/cutlass_dispatch.h +++ /dev/null @@ -1,261 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file Dispatch routines for CUTLASS GEMM kernels - */ - -// CUDA includes -#include - -// Cutlass GEMM API -#include -#include -#include - -// Test utilities -#include "util/type_conversion.h" - -namespace cutlass { - - - -/****************************************************************************** - * Cutlass dispatch entrypoints - ******************************************************************************/ - -// -// Compile-time overrides for alignment and ragged handling. -// - -// If zero, all feasible alignment options are supported. -#ifndef GEMM_ALIGNMENT -#define GEMM_ALIGNMENT 0 -#endif - -// If true, kernels are compiled with ragged handling enabled. -#ifndef GEMM_RAGGED - #define GEMM_RAGGED true -#endif - -// -// Dispatch logic given problem size specialization, math operation class, layout -// and type of operands, and epilogue operation. -// - -/** - * Cutlass GEMM dispatch - */ -template < - gemm::tiling_strategy::kind_t _TilingStrategy, ///< Tile-sizing classification category - math_operation_class_t _math_op, // Indicates - matrix_transform_t::kind_t _TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t _TransformB, ///< Transformation op for matrix B - typename _value, ///< Multiplicand value type (matrices A and B) - typename _accum, ///< Accumulator value type (matrix C and scalars) - typename _epilogue_op_t ///< Epilogue opeartion to update matrix C - = gemm::blas_scaled_epilogue<_accum, _accum, _accum> -> -struct cutlass_gemm_dispatch -{ - // - // Type alias definitions - // - - static const gemm::tiling_strategy::kind_t TilingStrategy = _TilingStrategy; - static const math_operation_class_t math_op = _math_op; - static const matrix_transform_t::kind_t TransformA = _TransformA; - static const matrix_transform_t::kind_t TransformB = _TransformB; - - using value_t = _value; - using accum_t = _accum; - using epilogue_op_t = _epilogue_op_t; - - // - // Methods - // - - /// Returns leading dimension for A matrix operand - int leading_dim_a(int m, int k) const - { - return (TransformA == matrix_transform_t::NonTranspose ? m : k); - } - - /// Returns leading dimension for B matrix operand - int leading_dim_b(int k, int n) const - { - return (TransformB == matrix_transform_t::NonTranspose ? k : n); - } - - /// Launches a GEMM - template - gemm::launch_configuration launch( - int m, - int n, - int k, - epilogue_op_t epilogue_op, - value_t *A, - value_t *B, - accum_t *C, - cudaStream_t stream = 0, - bool debug_synchronous = false) - { - return gemm::device_gemm< - TilingStrategy, - math_op, - TransformA, - operand_alignment, - TransformB, - operand_alignment, - value_t, - accum_t, - epilogue_op_t, - accumulator_alignment> - ( - m, - n, - k, - epilogue_op, - A, - B, - C, - stream, - debug_synchronous); - } - - /// Dispatches a CUTLASS GEMM - gemm::launch_configuration operator()( - cublasHandle_t handle, ///< CUBLAS handle - int m, ///< Rows of GEMM problem - int n, ///< Columns of GEMM problem - int k, ///< Inner dimension of GEMM problem - value_t *A, ///< A matrix - value_t *B, ///< B matrix - accum_t *C, ///< C matrix - accum_t alpha, ///< Scalar used for multiplicands - accum_t beta, ///< Scalar used for addend - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream - /// after every kernel launch to check for errors. - { - - // Forces kernel selection to choose specific alignment (in bytes) - int const force_operand_alignment = GEMM_ALIGNMENT; - - // Problem size must be multiple of the smallest vector load size - typedef value_t operand_load_t; - int const accumulator_alignment = sizeof(accum_t); - - int const lda = leading_dim_a(m, k); - int const ldb = leading_dim_b(k, n); - - epilogue_op_t epilogue(alpha, beta); - - // TODO: opportunity for metaprogramming loop - - // Prefer the largest granularity of vector load that is compatible with - // problem size and data alignment. - if ((!force_operand_alignment || force_operand_alignment == 16) && - !((sizeof(operand_load_t) * lda) % 16) && - !((sizeof(operand_load_t) * ldb) % 16)) - { - #if !(GEMM_ALIGNMENT) || (GEMM_ALIGNMENT == 16) - return launch<__NV_STD_MAX(16, sizeof(value_t)), accumulator_alignment>( - m, - n, - k, - epilogue, - A, - B, - C, - stream, - debug_synchronous); - #endif - } - else if ((!force_operand_alignment || force_operand_alignment == 8) && - !((sizeof(operand_load_t) * lda) % 8) && - !((sizeof(operand_load_t) * ldb) % 8)) - { - #if !(GEMM_ALIGNMENT) || (GEMM_ALIGNMENT == 8) - return launch<__NV_STD_MAX(8, sizeof(value_t)), accumulator_alignment>( - m, - n, - k, - epilogue, - A, - B, - C, - stream, - debug_synchronous); - #endif - } - else if ((!force_operand_alignment || force_operand_alignment == 4) && - !((sizeof(operand_load_t) * lda) % 4) && - !((sizeof(operand_load_t) * ldb) % 4)) - { - #if !(GEMM_ALIGNMENT) || (GEMM_ALIGNMENT == 4) - return launch<__NV_STD_MAX(4, sizeof(value_t)), accumulator_alignment>( - m, - n, - k, - epilogue, - A, - B, - C, - stream, - debug_synchronous); - #endif - } - else if ((!force_operand_alignment || force_operand_alignment == 2) && - !((sizeof(operand_load_t) * lda) % 2) && - !((sizeof(operand_load_t) * ldb) % 2)) - { - // 16-bit alignment only supported for HGEMM - #if defined(TEST_HGEMM) || defined(TEST_WGEMM) - #if !(GEMM_ALIGNMENT) || (GEMM_ALIGNMENT == 2) - return launch<__NV_STD_MAX(2, sizeof(value_t)), accumulator_alignment>( - m, - n, - k, - epilogue, - A, - B, - C, - stream, - debug_synchronous); - #endif - #endif - } - - return gemm::launch_configuration(cudaErrorInvalidValue); - } -}; - - -} // namespace cutlass diff --git a/cutlass_test/gemm.cu b/cutlass_test/gemm.cu deleted file mode 100644 index bdf29604..00000000 --- a/cutlass_test/gemm.cu +++ /dev/null @@ -1,572 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file gemm.cu - * GEMM test driver - * - */ - -#include -#include -#include -#include - -// CUBLAS GEMM API -#include - -// Set Cutlass debug macro to enable console printing of library errors -#define DEBUG - -#if defined(WMMA) -// Conditionally include WMMA headers (CUDA 9 Preview Feature) -#include -#endif - -// Cutlass GEMM API -#include -#include -#include - -// Test utilities -#include "util/command_line.h" -#include "util/half.h" -#include "util/matrix.h" -#include "util/timer.h" -#include "util/type_conversion.h" - -// Dispatch routines to CUBLAS and CUTLASS -#include "cublas_dispatch.h" -#include "cutlass_dispatch.h" - -/****************************************************************************** - * Globals, constants and typedefs - ******************************************************************************/ - -using namespace cutlass; - -/// CUBLAS handle -cublasHandle_t g_cublas_handle; - -/// The device-id of the current device -int g_device_id = -1; - -/// The number of timing iterations to invoke -int g_timing_iterations = -1; - -/// The number of randomly-sized problems to schmoo -int g_schmoo = 0; - - -/****************************************************************************** - * Number generation - ******************************************************************************/ - -/** - * Simple low-integer generator - */ -struct simple_gen -{ - std::default_random_engine generator; - std::uniform_int_distribution distribution; - - /// Constructor - simple_gen(int max) : distribution(max * -1, max) - {} - - /// Functor - int operator()() - { - return distribution(generator); - } -}; - - - - -/****************************************************************************** - * Test execution - ******************************************************************************/ - - -/** - * Compute C = (alpha * A * B) + (beta * C) - */ -template < - typename test_func_t, ///< Test function type - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t> ///< Accumulator value type (matrix C and scalars) -bool test( - int m, ///< Height of C in rows - int n, ///< Width of C in columns - int k, ///< Width (height) of A (B) - accum_t alpha, ///< Multiplicand scalar - accum_t beta) ///< Addend scalar -{ - cudaStream_t stream = 0; - - // - // Initialize matrices - // - - matrix A( - (TransformA == matrix_transform_t::NonTranspose) ? m : k, - (TransformA == matrix_transform_t::NonTranspose) ? k : m); - - matrix B( - (TransformB == matrix_transform_t::NonTranspose) ? k : n, - (TransformB == matrix_transform_t::NonTranspose) ? n : k); - - matrix C(m, n); - - // initialized matrices with small values precisely representable as integers - simple_gen a_gen(3); - simple_gen b_gen(5); - A.fill_random(a_gen); - B.fill_random(b_gen); - C.fill_ramp(0,0); - -// // Alternatively, initialize with procedural values to simplify debugging incorrect results -// A.fill_ramp(1,2); -// B.fill_ramp(1,1); - - // Sync to device - A.sync_device(); - B.sync_device(); - C.sync_device(); - - CUDA_PERROR(cudaPeekAtLastError()); - CUDA_PERROR(cudaDeviceSynchronize()); - - // - // Run test once with debug-synchronous enabled and check result - // - - if (!g_schmoo) printf("\n"); - - test_func_t test_func; - - C.fill_ramp(0, 0); - C.sync_device(); - - cudaError_t error = test_func( - g_cublas_handle, - m, - n, - k, - A.d_data(), - B.d_data(), - C.d_data(), - alpha, - beta, - stream, - !g_schmoo).result; - - bool not_applicable = (error == cudaErrorInvalidValue); - bool is_failed = false; - if (not_applicable) - { - printf(", NA"); - } - else - { - CUDA_PERROR(error); - - // Compute reference check if wont take too long on CPU - if ((!g_schmoo) && (m * n <= 1024 * 1024)) - { - matrix ref_C(m, n); - ref_C.fill_ramp(0, 0); - ref_C.gemm(TransformA, TransformB, alpha, A, B, beta); - C.sync_host(); - - is_failed = (C != ref_C); - - if (!g_schmoo) - { - if (is_failed) - { - printf("FAIL, "); - std::ofstream file_a("a.csv"); - A.write_matrix(file_a); - std::ofstream file_b("b.csv"); - B.write_matrix(file_b); - std::ofstream file_d("gemm-REF.csv"); - ref_C.write_matrix(file_d); - std::ofstream file_c("gemm-GPU.csv"); - C.write_matrix(file_c); - } - else - { - printf("PASS, "); - } - } - } - fflush(stdout); - - // - // Warmup and timing iterations - // - - if (g_timing_iterations > 0) - { - // Warmup for 1/100 of the timing iterations (minimum of 2) - for (int i = 0; i < __NV_STD_MAX(2, (g_timing_iterations + 99) / 100); ++i) - { - CUDA_PERROR(test_func( - g_cublas_handle, - m, - n, - k, - A.d_data(), - B.d_data(), - C.d_data(), - alpha, - beta, - stream, - false).result); - } - } - - // Conduct timing iterations - double elapsed_ms = 0; - gpu_timer timer; - timer.start(); - - for (int i = 0; i < g_timing_iterations; i++) - { - CUDA_PERROR(test_func( - g_cublas_handle, - m, - n, - k, - A.d_data(), - B.d_data(), - C.d_data(), - alpha, - beta, - stream, - false).result); - } - - timer.stop(); - elapsed_ms += timer.elapsed_millis(); - double avg_ms = elapsed_ms / g_timing_iterations; - - // Display performance - if (g_timing_iterations > 0) - { - int64_t num_flops = (2 * int64_t(m) * int64_t(n) * int64_t(k)) + (2 * int64_t(m) * int64_t(n)); - double gflops_per_sec = double(num_flops) / avg_ms / 1.0e6; - - if (g_schmoo) - { - if (is_failed) - printf("F"); - - printf(", %.3f", gflops_per_sec); - - // Sleep for a few milliseconds to cool - sleep_millis(10); - } - else - { - printf("Avg runtime: %.3f ms, total flops: %lld, GFLOP/s: %.2f\n", - avg_ms, - num_flops, - gflops_per_sec); - } - fflush(stdout); - } - } - - return is_failed; -} - -/** - * Compute C = (alpha * A * B) + (beta * C) - */ -template < - math_operation_class_t math_op, - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t> ///< Accumulator value type (matrix C and scalars) -bool test( - int m, ///< Height of C in rows - int n, ///< Width of C in columns - int k, ///< Width (height) of A (B) - accum_t alpha, ///< Multiplicand scalar - accum_t beta) ///< Addend scalar -{ - uint64_t flop_base = 1ull << 41; - int max_timing_iterations = 10000; - int min_timing_iterations = 10; - - bool test_error = false; - - // Scale the number of timing iterations with respect to problem size (if not specified on commandline) - if ((g_timing_iterations < 0) || g_schmoo) - { - uint64_t num_flops = (2 * uint64_t(m) * uint64_t(n) * uint64_t(k)) + (2 * uint64_t(m) * uint64_t(n)); - g_timing_iterations = (int) ((flop_base / sizeof(value_t)) / num_flops); - - g_timing_iterations = (int) __NV_STD_MIN(max_timing_iterations, g_timing_iterations); - g_timing_iterations = (int) __NV_STD_MAX(min_timing_iterations, g_timing_iterations); - } - - if (g_schmoo) - { - printf("%d, %d, %d, %c%c, %d, %d", - m, n, k, - (TransformA == matrix_transform_t::NonTranspose) ? 'n' : 't', - (TransformB == matrix_transform_t::NonTranspose) ? 'n' : 't', - m * n, - g_timing_iterations); - } - else - { - printf("\n------------------------------------------------------------\n"); - printf("%dx%dx%d, GEMM_%c%c, %d C elements, %d timing iterations\n", - m, n, k, - (TransformA == matrix_transform_t::NonTranspose) ? 'n' : 't', - (TransformB == matrix_transform_t::NonTranspose) ? 'n' : 't', - m * n, - g_timing_iterations); - } - fflush(stdout); - - // CUBLAS - test_error |= test< - cublas_gemm, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - // CUTLASS - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - return test_error; -} - - - - -/****************************************************************************** - * Main - ******************************************************************************/ - - -/** - * Main - */ -int main(int argc, const char **argv) -{ - // - // Problem type (compiler-supplied so we don't compile everything) - // - - // Define value_t and accum_t (multiplicand and accumulator types, respectively) -#if defined(TEST_SGEMM) - typedef float value_t; - typedef float accum_t; - const math_operation_class_t math_op = math_operation_class_t::scalar; -#elif defined(TEST_DGEMM) - typedef double value_t; - typedef double accum_t; - const math_operation_class_t math_op = math_operation_class_t::scalar; -#elif defined(TEST_HGEMM) - typedef __half value_t; - typedef __half accum_t; - const math_operation_class_t math_op = math_operation_class_t::scalar; -#elif defined(TEST_IGEMM) - typedef int8_t value_t; - typedef int32_t accum_t; - const math_operation_class_t math_op = math_operation_class_t::scalar; -#elif defined(TEST_WGEMM) - typedef half value_t; - typedef float accum_t; - const math_operation_class_t math_op = math_operation_class_t::matrix; -#else - #error Unknown GEMM type requested. -#endif - - - // Define transpose constants -#ifdef TRANSPOSE_A - static const matrix_transform_t::kind_t TransformA = matrix_transform_t::Transpose; -#else - static const matrix_transform_t::kind_t TransformA = matrix_transform_t::NonTranspose; -#endif - -#ifdef TRANSPOSE_B - static const matrix_transform_t::kind_t TransformB = matrix_transform_t::Transpose; -#else - static const matrix_transform_t::kind_t TransformB = matrix_transform_t::NonTranspose; -#endif - - - // - // Commandline parsing - // - - // Initialize command line - command_line args(argc, argv); - - int m_factor = args.device_prop.multiProcessorCount * 128; - int m = round_nearest(4096, m_factor); - int k = 4096; - int n = 4096; - float alpha = 1.0; - float beta = 0.0; - - g_device_id = args.device_id; - args.get_cmd_line_argument("m", m); - args.get_cmd_line_argument("n", n); - args.get_cmd_line_argument("k", k); - args.get_cmd_line_argument("i", g_timing_iterations); - args.get_cmd_line_argument("alpha", alpha); - args.get_cmd_line_argument("beta", beta); - args.get_cmd_line_argument("schmoo", g_schmoo); - - // Print usage - if (args.check_cmd_line_flag("help")) - { - printf("%s " - "[--help] " - "[--i=] " - "[--device=] " - "[--alpha= --beta=] " - "[--schmoo= || --m= --n= --k=]" - "\n", argv[0]); - exit(0); - } - - // Initialize cuBLAS - if (cublasCreate(&g_cublas_handle) != CUBLAS_STATUS_SUCCESS) - { - fprintf(stderr, "cublasCreate() failed\n"); - exit(1); - } - - bool test_error = false; - - if (g_schmoo) - { - // Run a schmoo of problem sizes - printf("M, N, K, transpose, total_flops, timing_iterations, sol_flop/s, cublas_sol, cutlass_small_sol, cutlass_med_sol, cutlass_large_sol, cutlass_tall_sol, cutlass_wide_sol, cutlass_huge_sol\n"); - - // Generate power-law distribution from [32, 16384) - std::mt19937 gen(0); - std::uniform_real_distribution dis(5, 14); - for (int i = 0; i < g_schmoo; ++i) - { - int m = int(pow(float(2), dis(gen))); - int n = int(pow(float(2), dis(gen))); - int k = int(pow(float(2), dis(gen))); - - // Round m and n to nearest multiple of 32 if < 128, otherwise to the nearest 128 - m = (m < 128) ? - round_nearest(m, 32) : - round_nearest(m, 128); - n = (n < 128) ? - round_nearest(n, 32) : - round_nearest(n, 128); - - // Round k to the nearest 16 - k = (sizeof(value_t) == 1) ? - round_nearest(k, 32) : - round_nearest(k, 16); - - test_error |= test( - m, n, k, - from_float(alpha), - from_float(beta)); - - printf("\n"); fflush(stdout); - } - } - else - { - // Test a single GEMM problem size - test_error |= test( - m, - n, - k, - from_float(alpha), - from_float(beta)); - } - - // Cleanup - cublasDestroy(g_cublas_handle); - - return test_error; -} - diff --git a/cutlass_test/util/command_line.h b/cutlass_test/util/command_line.h deleted file mode 100644 index 9bdc99d9..00000000 --- a/cutlass_test/util/command_line.h +++ /dev/null @@ -1,320 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - - -#pragma once - -/** - * \file - * Utility for parsing command line arguments - */ - -#include -#include -#include -#include -#include - -#include -#include - - -namespace cutlass { - -/****************************************************************************** - * command_line - ******************************************************************************/ - -/** - * Utility for parsing command line arguments - */ -struct command_line -{ - - std::vector keys; - std::vector values; - std::vector args; - int device_id; - cudaDeviceProp device_prop; - float device_giga_bandwidth; - size_t device_free_physmem; - size_t device_total_physmem; - - /** - * Constructor - */ - command_line(int argc, const char **argv, int device_id = -1) : - keys(10), - values(10), - device_id(device_id) - { - using namespace std; - - for (int i = 1; i < argc; i++) - { - string arg = argv[i]; - - if ((arg[0] != '-') || (arg[1] != '-')) - { - args.push_back(arg); - continue; - } - - string::size_type pos; - string key, val; - if ((pos = arg.find('=')) == string::npos) { - key = string(arg, 2, arg.length() - 2); - val = ""; - } else { - key = string(arg, 2, pos - 2); - val = string(arg, pos + 1, arg.length() - 1); - } - - keys.push_back(key); - values.push_back(val); - } - - // Initialize device - CUDA_PERROR_EXIT(device_init()); - } - - - /** - * Checks whether a flag "--" is present in the commandline - */ - bool check_cmd_line_flag(const char* arg_name) - { - using namespace std; - - for (int i = 0; i < int(keys.size()); ++i) - { - if (keys[i] == string(arg_name)) - return true; - } - return false; - } - - - /** - * Returns number of naked (non-flag and non-key-value) commandline parameters - */ - template - int num_naked_args() - { - return args.size(); - } - - - /** - * Returns the commandline parameter for a given index (not including flags) - */ - template - void get_cmd_line_argument(int index, value_t &val) - { - using namespace std; - if (index < args.size()) { - istringstream str_stream(args[index]); - str_stream >> val; - } - } - - /** - * Returns the value specified for a given commandline parameter --= - */ - template - void get_cmd_line_argument(const char *arg_name, value_t &val) - { - using namespace std; - - for (int i = 0; i < int(keys.size()); ++i) - { - if (keys[i] == string(arg_name)) - { - istringstream str_stream(values[i]); - str_stream >> val; - } - } - } - - - /** - * Returns the values specified for a given commandline parameter --=,* - */ - template - void get_cmd_line_arguments( - const char *arg_name, - std::vector &vals, - char sep = ',') - { - using namespace std; - - if (check_cmd_line_flag(arg_name)) - { - // Clear any default values - vals.clear(); - - // Recover from multi-value string - for (int i = 0; i < keys.size(); ++i) - { - if (keys[i] == string(arg_name)) - { - string val_string(values[i]); - istringstream str_stream(val_string); - string::size_type old_pos = 0; - string::size_type new_pos = 0; - - // Iterate -delimited values - value_t val; - while ((new_pos = val_string.find(sep, old_pos)) != string::npos) - { - if (new_pos != old_pos) - { - str_stream.width(new_pos - old_pos); - str_stream >> val; - vals.push_back(val); - } - - // skip over delimiter - str_stream.ignore(1); - old_pos = new_pos + 1; - } - - // Read last value - str_stream >> val; - vals.push_back(val); - } - } - } - } - - - /** - * The number of pairs parsed - */ - int parsed_argc() - { - return (int) keys.size(); - } - - /** - * Initialize device - */ - cudaError_t device_init() - { - cudaError_t error = cudaSuccess; - - do - { - int deviceCount; - if (CUDA_PERROR(error = cudaGetDeviceCount(&deviceCount))) break; - - if (deviceCount == 0) { - fprintf(stderr, "No devices supporting CUDA.\n"); - exit(1); - } - if (device_id < 0) - { - get_cmd_line_argument("device", device_id); - } - if ((device_id > deviceCount - 1) || (device_id < 0)) - { - device_id = 0; - } - - if (CUDA_PERROR(error = cudaSetDevice(device_id))) break; - - if (CUDA_PERROR(error = cudaMemGetInfo(&device_free_physmem, &device_total_physmem))) break; - - if (CUDA_PERROR(error = cudaGetDeviceProperties(&device_prop, device_id))) break; - - if (device_prop.major < 1) { - fprintf(stderr, "Device does not support CUDA.\n"); - exit(1); - } - - device_giga_bandwidth = float(device_prop.memoryBusWidth) * device_prop.memoryClockRate * 2 / 8 / 1000 / 1000; - - } while (0); - - return error; - } - - - //------------------------------------------------------------------------- - // Utility functions - //------------------------------------------------------------------------- - - /// Tokenizes a comma-delimited list of string pairs delimited by ':' - static void tokenize( - std::vector > &tokens, - std::string const &str, - char delim = ',', - char sep = ':') - { - // Home-built to avoid Boost dependency - size_t s_idx = 0; - size_t d_idx = std::string::npos; - while (s_idx < str.size()) - { - d_idx = str.find_first_of(delim, s_idx); - - size_t end_idx = (d_idx != std::string::npos ? d_idx : str.size()); - size_t sep_idx = str.find_first_of(sep, s_idx); - size_t offset = 1; - if (sep_idx == std::string::npos || sep_idx >= end_idx) - { - sep_idx = end_idx; - offset = 0; - } - - std::pair item( - str.substr(s_idx, sep_idx - s_idx), - str.substr(sep_idx + offset, end_idx - sep_idx - offset)); - - tokens.push_back(item); - s_idx = end_idx + 1; - } - } - - /// Tokenizes a comma-delimited list of string pairs delimited by ':' - static void tokenize( - std::vector &tokens, - std::string const &str, - char delim = ',', - char sep = ':') - { - std::vector > token_pairs; - tokenize(token_pairs, str, delim, sep); - for (auto const &tok : token_pairs) - { - tokens.push_back(tok.first); - } - } -}; - - -} // namespace cutlass diff --git a/cutlass_test/util/exceptions.h b/cutlass_test/util/exceptions.h deleted file mode 100644 index 7f12e69c..00000000 --- a/cutlass_test/util/exceptions.h +++ /dev/null @@ -1,91 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief C++ exception semantics for CUDA error codes - */ - -#include -#include - - -namespace cutlass { - - -/// C++ exception wrapper for CUDA \p cudaError_t -class cuda_exception : public std::exception -{ -public: - - /// Constructor - cuda_exception( - const char *msg = "", - cudaError_t err = cudaErrorUnknown) - : - msg(msg), err(err) - {} - - /// Returns the explanatory string - const char *what() const noexcept - { - return msg; - } - - /// Returns the underlying CUDA \p cudaError_t - cudaError_t cudaError() const - { - return err; - } - - -protected: - - /// Explanatory string - const char *msg; - - /// Underlying CUDA \p cudaError_t - cudaError_t err; -}; - - -/// Writes a cudaError_t to an output stream -inline std::ostream & operator<<(std::ostream &out, cudaError_t result) -{ - return out << cudaGetErrorString(result); -} - -/// Writes a cuda_exception instance to an output stream -inline std::ostream & operator<<(std::ostream &out, cuda_exception const &e) -{ - return out << e.what() << ": " << e.cudaError(); -} - - -} // namespace cutlass diff --git a/cutlass_test/util/half.h b/cutlass_test/util/half.h deleted file mode 100644 index cef2ccec..00000000 --- a/cutlass_test/util/half.h +++ /dev/null @@ -1,231 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ -#pragma once - -/** - * \file - * Utilities for interacting with the opaque CUDA __half type - */ - -#include -#include -#include - -namespace cutlass { - - -/****************************************************************************** - * half_t - ******************************************************************************/ - -/** - * Host-based fp16 data type compatible and convertible with __half - */ -struct half_t -{ - uint16_t __x; - - /// Constructor from __half - half_t(const __half &other) - { - __x = reinterpret_cast(other); - } - - /// Constructor from integer - half_t(int a) - { - *this = half_t(float(a)); - } - - - /// Constructor from float - half_t(float a) - { - uint32_t ia = *reinterpret_cast(&a); - uint16_t ir; - - ir = (ia >> 16) & 0x8000; - - if ((ia & 0x7f800000) == 0x7f800000) - { - if ((ia & 0x7fffffff) == 0x7f800000) - { - ir |= 0x7c00; /* infinity */ - } - else - { - ir = 0x7fff; /* canonical NaN */ - } - } - else if ((ia & 0x7f800000) >= 0x33000000) - { - int32_t shift = (int32_t) ((ia >> 23) & 0xff) - 127; - if (shift > 15) - { - ir |= 0x7c00; /* infinity */ - } - else - { - ia = (ia & 0x007fffff) | 0x00800000; /* extract mantissa */ - if (shift < -14) - { /* denormal */ - ir |= ia >> (-1 - shift); - ia = ia << (32 - (-1 - shift)); - } - else - { /* normal */ - ir |= ia >> (24 - 11); - ia = ia << (32 - (24 - 11)); - ir = ir + ((14 + shift) << 10); - } - /* IEEE-754 round to nearest of even */ - if ((ia > 0x80000000) || ((ia == 0x80000000) && (ir & 1))) - { - ir++; - } - } - } - - this->__x = ir; - } - - /// Cast to __half - operator __half() const - { - return reinterpret_cast(__x); - } - - /// Cast to float - operator float() const - { - int sign = ((this->__x >> 15) & 1); - int exp = ((this->__x >> 10) & 0x1f); - int mantissa = (this->__x & 0x3ff); - uint32_t f = 0; - - if (exp > 0 && exp < 31) - { - // normal - exp += 112; - f = (sign << 31) | (exp << 23) | (mantissa << 13); - } - else if (exp == 0) - { - if (mantissa) - { - // subnormal - exp += 113; - while ((mantissa & (1 << 10)) == 0) - { - mantissa <<= 1; - exp--; - } - mantissa &= 0x3ff; - f = (sign << 31) | (exp << 23) | (mantissa << 13); - } - else - { - // zero - f = 0; - } - } - else if (exp == 31) - { - if (mantissa) - { - f = 0x7fffffff; // not a number - } - else - { - f = (0xff << 23) | (sign << 31); // inf - } - } - return *reinterpret_cast(&f); - } - - - /// Get raw storage - uint16_t raw() - { - return this->__x; - } - - /// Assignment by sum - bool operator ==(const half_t &other) - { - return (this->__x == other.__x); - } - - /// Increment - half_t& operator +=(const half_t &rhs) - { - *this = half_t(float(*this) + float(rhs)); - return *this; - } - - /// Decrement - half_t& operator -=(const half_t &rhs) - { - *this = half_t(float(*this) - float(rhs)); - return *this; - } - - /// Multiply - half_t operator*(const half_t &other) - { - return half_t(float(*this) * float(other)); - } - - /// Multiply - half_t operator+(const half_t &other) - { - return half_t(float(*this) + float(other)); - } - -}; - - -/****************************************************************************** - * I/O stream overloads - ******************************************************************************/ - -/// Insert formatted \p half_t into the output stream -std::ostream& operator<<(std::ostream &out, const half_t &x) -{ - out << (float)x; - return out; -} - - -/// Insert formatted \p __half into the output stream -std::ostream& operator<<(std::ostream &out, const __half &x) -{ - return out << half_t(x); -} - - -} // namespace cutlass diff --git a/cutlass_test/util/matrix.h b/cutlass_test/util/matrix.h deleted file mode 100644 index 7ae080c8..00000000 --- a/cutlass_test/util/matrix.h +++ /dev/null @@ -1,503 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Matrix data structure providing basic CPU-based algorithms and - * operations that can be cloned and synchronized in GPU device memory - */ - -#include -#include - -#include -#include "../cutlass/util/matrix_transform.h" -#include "half.h" - - -namespace cutlass { - -/** - * \brief Matrix data structure providing basic CPU-based algorithms and - * operations that be synchronized with a GPU-based replica - */ -template -struct matrix -{ - // Host value type (must be convertible to/from value_t) - typedef typename nv_std::conditional< - (nv_std::is_same::value), // If (value_t == __half) ... - half_t, // ... use half_t internally for host storage, else... - value_t>::type // ... use value_t directly - host_value_t; - - - //----------------------------------------------------------------------------- - // Data members - //----------------------------------------------------------------------------- - -private: - - /// M dimension (height in rows) - int _m; - - /// N dimension (width in columns) - int _n; - - /// Data array on host - std::vector _h_data; - - /// Clone of data array on GPU device - value_t *_d_data; - - /// GPU Device identifier that clone synchronizes with - int _device_id; - -public: - - //----------------------------------------------------------------------------- - // Lifetime and synchronization - //----------------------------------------------------------------------------- - - /** - * Constructor: zero-initializes the matrix. - */ - matrix( - int m, ///< Height of the matrix in rows - int n) ///< Width of the matrix in columns - : - _m(m), - _n(n), - _d_data(NULL), - _device_id(0) - { - _h_data.resize(_m * _n, 0); - CUDA_PERROR_EXIT(cudaMalloc((void ** )&_d_data, sizeof(value_t) * _m * _n)); - CUDA_PERROR_EXIT(cudaGetDevice(&_device_id)); - } - - /// Destructor - ~matrix() - { - if (_d_data) - { - CUDA_PERROR_EXIT(cudaFree(_d_data)); - } - } - - /** - * Synchronize the GPU-based replica with the current host-based matrix data - */ - void sync_device() - { - size_t bytes = _m * _n * sizeof(value_t); - CUDA_PERROR_EXIT(cudaMemcpy(_d_data, &_h_data[0], bytes, cudaMemcpyHostToDevice)); - } - - - /** - * Synchronize the host-based replica with the current GPU-based matrix data - */ - void sync_host() - { - size_t bytes = _m * _n * sizeof(value_t); - CUDA_PERROR_EXIT(cudaMemcpy(&_h_data[0], _d_data, bytes, cudaMemcpyDeviceToHost)); - } - - - //----------------------------------------------------------------------------- - // Inspectors - //----------------------------------------------------------------------------- - - /** - * Return the height of the matrix, subject to the optional \p transpose_op - */ - int height(matrix_transform_t transpose_op = matrix_transform_t::NonTranspose) const - { - switch (transpose_op) - { - case matrix_transform_t::NonTranspose : return _m; - case matrix_transform_t::Transpose : return _n; - default: return -1; - } - } - - - /** - * Return the width of the matrix, subject to the optional \p transpose_op - */ - int width(matrix_transform_t transpose_op = matrix_transform_t::NonTranspose) const - { - switch (transpose_op) - { - case matrix_transform_t::NonTranspose : return _n; - case matrix_transform_t::Transpose : return _m; - default: return -1; - } - } - - - /** - * Return item at (x, y) coordinate of matrix, subject to the optional \p transform op - */ - host_value_t get( - int x, - int y, - matrix_transform_t transpose_op = matrix_transform_t::NonTranspose) const - { - switch (transpose_op) - { - case matrix_transform_t::NonTranspose : return _h_data[y + (x * _m)]; - case matrix_transform_t::Transpose : return _h_data[x + (y * _m)]; - default: return 0; - } - } - - - /** - * Return the distance (in items) within memory between elements of two - * consecutive columns which have the same row index, subject to the optional \p transform op - */ - int leading_dim(matrix_transform_t transpose_op = matrix_transform_t::NonTranspose) const - { - switch (transpose_op) - { - case matrix_transform_t::NonTranspose : return _m; - case matrix_transform_t::Transpose : return _n; - default: return 0; - } - } - - /** - * Get host data pointer - */ - value_t* h_data() - { - return _h_data.data(); - } - - - /** - * Get host data pointer - */ - value_t const* h_data() const - { - return _h_data.data(); - } - - /** - * Get device data pointer - */ - value_t const* d_data() const - { - return _d_data; - } - - /** - * Get device data pointer - */ - value_t * d_data() - { - return _d_data; - } - - //----------------------------------------------------------------------------- - // Initialization - //----------------------------------------------------------------------------- - - /** - * Initialize matrix values with a 2D "ramp" defined as - * values(x, y) = (y * rs) + (x * cs) - */ - void fill_ramp( - host_value_t rs, - host_value_t cs) - { - for (int x = 0; x < _n; x++) - { - for (int y = 0; y < _m; y++) - { - _h_data[y + (x * _m)] = host_value_t((y * rs) + (x * cs)); - } - } - } - - - /** - * Initialize matrix values such that all the elements of the principal diagonal - * are ones and all other elements are zeros - */ - void fill_identity() - { - for (int j = 0; j < _n; j++) - { - for (int i = 0; i < _m; i++) - { - _h_data[i + j * _m] = host_value_t(i == j ? 1 : 0); - } - } - } - - - /** - * Initialize matrix values using the random number \p generator. The - * \p generator reference is assumed to be a nullary functor that returns - * values convertible to the matrix \p value_t. - */ - template - void fill_random(T & generator) - { - for (int j = 0; j < _n; j++) - { - for (int i = 0; i < _m; i++) - { - _h_data[i + j * _m] = (value_t) generator(); - } - } - } - - - /** - * Element-wise matrix addition - */ - matrix & operator+=(matrix const &mat) - { - for (int j = 0; j < _n; j++) - { - for (int i = 0; i < _m; i++) - { - _h_data[i + j * _m] += mat._h_data[i + j * _m]; - } - } - return *this; - } - - /** - * Element-wise matrix subtraction - */ - matrix & operator-=(matrix const &mat) - { - for (int j = 0; j < _n; j++) - { - for (int i = 0; i < _m; i++) - { - _h_data[i + j * _m] -= mat._h_data[i + j * _m]; - } - } - return *this; - } - - //----------------------------------------------------------------------------- - // Output - //----------------------------------------------------------------------------- - - /** - * Prints matrix in CSV to output stream - */ - template - std::ostream & write_matrix(std::ostream &out, _hv_t) - { - for (int i = 0; i < _m; i++) - { - for (int j = 0; j < _n; j++) - { - out << (j ? "," : "") << _h_data[i + j * _m]; - } - out << "\n"; - } - return out; - } - - - /** - * Prints matrix in CSV to output stream - */ - std::ostream & write_matrix(std::ostream &out, int8_t) - { - for (int i = 0; i < _m; i++) - { - for (int j = 0; j < _n; j++) - { - out << (j ? "," : "") << int32_t(_h_data[i + j * _m]); - } - out << "\n"; - } - return out; - } - - - /** - * Prints matrix in CSV to output stream - */ - std::ostream & write_matrix(std::ostream &out) - { - return write_matrix(out, _h_data[0]); - } - - - //----------------------------------------------------------------------------- - // Floating point "almost-equal" utilities - //----------------------------------------------------------------------------- - - static bool almost_equal_ulps(half_t a, half_t b, int max_ulps) - { - if (a == b) - return true; - - int32_t int_diff = abs(a.raw() - b.raw()); - if (int_diff <= max_ulps) - return true; - return false; - } - - - static bool almost_equal_ulps(float a, float b, int max_ulps) - { - if (a == b) - return true; - int32_t int_diff = abs(*(int32_t*)&a - *(int32_t*)&b); - if (int_diff <= max_ulps) - return true; - return false; - } - - - static bool almost_equal_ulps(double a, double b, int max_ulps) - { - if (a == b) - return true; - int64_t int_diff = abs(*(int64_t*)&a - *(int64_t*)&b); - if (int_diff <= max_ulps) - return true; - return false; - } - - static bool almost_equal_ulps(int32_t a, int32_t b, int max_ulps) - { - return (a == b); - } - - - //----------------------------------------------------------------------------- - // matrix operations - //----------------------------------------------------------------------------- - - - /** - * Returns matrix equality - */ - bool operator==(const matrix &mat) const - { - int max_ulps = 30; - - if (_m != mat._m || _n != mat._n) - { - fprintf(stderr, "Error: dimension mismatch during matrix comparison.\n"); exit(1); - } - - for (int j = 0; j < _n; j++) - { - for (int i = 0; i < _m; i++) - { - if (!almost_equal_ulps(_h_data[i + j * _m], mat._h_data[i + j * _m], max_ulps)) - { - return false; - } - } - } - return true; - } - - - /** - * Returns matrix inequality - */ - bool operator!=(const matrix &mat) const - { - return !(*this == mat); - } - - - /** - * Computes this = (alpha * op(A) * op(B)) + (beta * this), specialized for gemm_nn - */ - template - void gemm( - matrix_transform_t transform_a, - matrix_transform_t transform_b, - host_value_t alpha, - const matrix &A, - const matrix &B, - host_value_t beta) - { - // Sanity check dimensions - if ((_m != A.height(transform_a)) || - (_n != B.width(transform_b)) || - (A.width(transform_a) != B.height(transform_b))) - { - fprintf(stderr, "Error: dimension mismatch during gemm.\n"); - exit(1); - } - - int M = A.height(transform_a); - int K = A.width(transform_a); - int N = B.width(transform_b); - - // Even the host-side implementation utilizes a blocking structure to improve - // verification performance - int DimBlockM = (M % 16 == 0) ? 16 : 1; - int DimBlockN = (N % 16 == 0) ? 16 : 1; - - for (int i = 0; i < M; i += DimBlockM) - { - for (int j = 0; j < N; j += DimBlockN) - { - for (int block_y = 0; block_y < DimBlockM; block_y++) - { - for (int block_x = 0; block_x < DimBlockN; block_x++) - { - int y = i + block_y; - int x = j + block_x; - - host_value_t accum(0); - for (int k = 0; k < K; k++) - { - accum += host_value_t(A.get(k, y, transform_a)) * host_value_t(B.get(x, k, transform_b)); - } - - _h_data[y + x * M] = (alpha * accum) + (beta * _h_data[y + x * M]); - } - } - } - } - } -}; - - -} // namespace cutlass diff --git a/cutlass_test/util/timer.h b/cutlass_test/util/timer.h deleted file mode 100644 index b13db976..00000000 --- a/cutlass_test/util/timer.h +++ /dev/null @@ -1,107 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * GPU kernel timer - */ - -#include - -#include - -namespace cutlass { - - -/****************************************************************************** - * gpu_timer - ******************************************************************************/ - -/** - * GPU event-based timer - */ -struct gpu_timer -{ - cudaEvent_t _start; - cudaEvent_t _stop; - - gpu_timer() - { - CUDA_PERROR_EXIT(cudaEventCreate(&_start)); - CUDA_PERROR_EXIT(cudaEventCreate(&_stop)); - } - - ~gpu_timer() - { - CUDA_PERROR_EXIT(cudaEventDestroy(_start)); - CUDA_PERROR_EXIT(cudaEventDestroy(_stop)); - } - - void start() - { - CUDA_PERROR_EXIT(cudaEventRecord(_start, 0)); - } - - void stop() - { - CUDA_PERROR_EXIT(cudaEventRecord(_stop, 0)); - } - - float elapsed_millis() - { - float elapsed = 0.0; - CUDA_PERROR_EXIT(cudaEventSynchronize(_stop)); - CUDA_PERROR_EXIT(cudaEventElapsedTime(&elapsed, _start, _stop)); - return elapsed; - } -}; - - -/****************************************************************************** - * sleep_millis - ******************************************************************************/ - -#ifdef _WIN32 - #include - - void sleep_millis(unsigned milliseconds) - { - Sleep(milliseconds); - } -#else - #include - - void sleep_millis(unsigned milliseconds) - { - usleep(milliseconds * 1000); // takes microseconds - } -#endif - - -} // namespace cutlass diff --git a/cutlass_test/util/type_conversion.h b/cutlass_test/util/type_conversion.h deleted file mode 100644 index fd55ff65..00000000 --- a/cutlass_test/util/type_conversion.h +++ /dev/null @@ -1,163 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Utilities for converting between types and assessing traits - */ - -#include "half.h" - -namespace cutlass { - -/****************************************************************************** - * Float conversion utilities - ******************************************************************************/ - -/// Convert float to value type -template -value_t from_float(float val) -{ - return value_t(val); -} - -/// Convert float to value type (__half specialization) -template <> -__half from_float<__half>(float val) -{ - return half_t(val); -} - - -/****************************************************************************** - * Type conversion utilities - ******************************************************************************/ - -/// Member \p type is defined as the signed integer type having the same size as \p T -template -struct integer_alias; - -template <> -struct integer_alias { - using type = int8_t; -}; - -template <> -struct integer_alias { - using type = int16_t; -}; - -template <> -struct integer_alias<__half> { - using type = int16_t; -}; - -template <> -struct integer_alias { - using type = int32_t; -}; - -template <> -struct integer_alias { - using type = int32_t; -}; - -template <> -struct integer_alias { - using type = int64_t; -}; - - - -/****************************************************************************** - * Type-info utilities - ******************************************************************************/ - -/// Returns a string to prefix 'gemm' to construct CUBLAS-like kernel names -template char const *to_prefix_string(); - -template <> char const *to_prefix_string() { - return "H"; -} - -template <> char const *to_prefix_string() { - return "H"; -} - -template <> char const *to_prefix_string() { - return "S"; -} - -template <> char const *to_prefix_string() { - return "WmmaH"; -} - -template <> char const *to_prefix_string() { - return "WmmaS"; -} - -template <> char const *to_prefix_string() { - return "D"; -} - -template <> char const *to_prefix_string() { - return "I"; -} - - -/****************************************************************************** - * Maps value_t to the minimum vector size used to load operand - ******************************************************************************/ - -template -struct operand_load_type; - -template <> -struct operand_load_type { using type = int32_t; }; - -template -struct operand_load_type { using type = T; }; - - -/****************************************************************************** - * Minimum alignment requirement, if any, determined from value_t. - ******************************************************************************/ - -template -struct gemm_alignment_requirement; - -template <> -struct gemm_alignment_requirement { static const int value = 4; }; - -template -struct gemm_alignment_requirement { static const int value = 0; }; - - - -} // namespace cutlass diff --git a/docs/generated-html/annotated.html b/docs/generated-html/annotated.html new file mode 100644 index 00000000..e6c405d5 --- /dev/null +++ b/docs/generated-html/annotated.html @@ -0,0 +1,378 @@ + + + + + + + +Cutlass: Class List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Class List
+
+
+
Here are the classes, structs, unions and interfaces with brief descriptions:
+
[detail level 1234]
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Ncutlass
 Ngemm
 Nplatform
 CAlignedStruct
 CComputeOffsetFromShapeCompute the offset for the given coordinates in a cube
 CComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
 CComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >Compute the offset for the given coordinates in a cube with a depth of 1
 CComputeOffsetFromStridesCompute the offset for the given coordinates in a cube
 CComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
 CComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >Compute the offset for the given coordinates in a cube with a depth of 1
 CComputeThreadOffsetFromStridesDecompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_
 CComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >Specialization for D=1 and C=1
 CComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >Specialization for D=1
 CConstPredicateTileAdapterAdapter to enable random access to predicates via logical coordinate within a tile
 CConvert
 CConvert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
 CCoordStatically-sized array specifying Coords within a tensor
 CCopy
 Cdivide_assert
 CExtentReturns the extent of a scalar or vector
 CExtent< Vector< T, Lanes > >Returns the number of lanes of a vector if need be
 CExtent< Vector< T, Lanes > const >Returns the number of lanes of a vector if need be
 CFragmentA template defining Fragment Concept
 CFragmentConstIterator
 CFragmentIteratorA template defining Fragment Iterator Concept
 CFragmentLoad
 CFragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CFragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CFragmentStore
 CFragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CFragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CGemmOperandGemm operand - D = A * B + C
 CIdentityDescribes identity elements
 Cis_pow2
 CIteratorAdvanceSpecifies dimension in which post-increment accesses advance
 CIteratorFragmentSpecifies whether iterator storage fragment consists of Scalar values or WMMA matrix
 CLoad
 CLoad< double, 2, Memory_, true, 16 >
 CLoad< Scalar_, Lanes_, Memory_, true, 16 >
 CLoad< Scalar_, Lanes_, Memory_, true, 4 >
 CLoad< Scalar_, Lanes_, Memory_, true, 8 >
 Clog2_down
 Clog2_down< N, 1, Count >
 Clog2_up
 Clog2_up< N, 1, Count >
 CMatrixLayoutDescribes layouts of matrices
 CMemorySpaceEnum to specify which memory space data resides in
 CPredicateTileAdapterAdapter to enable random access to predicates via logical coordinate within a tile
 CPredicateVectorStatically sized array of bits implementing
 CReshapeTile
 CReshapeTile< Tile_, kAccessSize_, true >
 CShapeA Shape implementing Layout Concept describing the dimensions of a cube
 CShapeAdd
 CShapeCountCompute derived counted of a Layout Concept based class
 CShapeDiv
 CShapeMax
 CShapeMin
 CShapeMul
 CShapeScale
 CShapeStrides
 CShapeSub
 Csqrt_est
 CStorageType
 CStorageType< 1 >
 CStorageType< 2 >
 CStorageType< 4 >
 CStore
 CStore< double, 2, Memory_, true, 16 >
 CStore< Scalar_, Lanes_, Memory_, true, 16 >
 CStore< Scalar_, Lanes_, Memory_, true, 4 >
 CStore< Scalar_, Lanes_, Memory_, true, 8 >
 CTensorRefStructure modeling a pointer and stride into a tensor
 CTensorViewHost-side reference implementation of tensor operations
 CTiledThreadOffsetBasic thread offset function computed from a thread shape
 CTileIteratorBaseIterator for accessing a stripmined tile in memory
 CTileLoadIteratorAn iterator implementing Tile Load Iterator Concept for loading a tile from memory
 CTileStoreIteratorAn iterator implementing Tile Store Iterator Concept for storing a tile to memory
 CTileTraitsA template defining Tile Traits Concept
 CTileTraitsContiguousMajor
 CTileTraitsStandardChooses 'best' shape to enable warp raking along contiguous dimension if possible
 CTileTraitsStrideMajor
 CTileTraitsWarpRakeTiling in which warps rake across the contiguous dimension
 CTrivialPredicateTileAdapterAlways returns true predicate
 CVector
 CVector< half, kLanes_ >
 CVectorize
 CVectorize< Element_, 1 >
 CVectorTraitsTraits describing properties of vectors and scalar-as-vectors
 CVectorTraits< Vector< T, Lanes > >Partial specialization for actual cutlass::Vector
 CVectorTraits< Vector< T, Lanes > const >Partial specialization for actual cutlass::Vector
+
+
+ + + + diff --git a/docs/generated-html/bc_s.png b/docs/generated-html/bc_s.png new file mode 100644 index 00000000..c3e55261 Binary files /dev/null and b/docs/generated-html/bc_s.png differ diff --git a/docs/generated-html/bdwn.png b/docs/generated-html/bdwn.png new file mode 100644 index 00000000..cb6ebb16 Binary files /dev/null and b/docs/generated-html/bdwn.png differ diff --git a/docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator-members.html b/docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator-members.html new file mode 100644 index 00000000..860cd05c --- /dev/null +++ b/docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator-members.html @@ -0,0 +1,99 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator Member List
+
+ + + + + diff --git a/docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator.html b/docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator.html new file mode 100644 index 00000000..1fbdc759 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator.html @@ -0,0 +1,389 @@ + + + + + + + +Cutlass: cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator Class Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator Class Reference
+
+
+ +

A const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes. +

+ +

#include <predicate_vector.h>

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE ConstIterator (ConstIterator const &it)
 Copy constructor. More...
 
CUTLASS_HOST_DEVICE ConstIterator (PredicateVector const &_vec, int _start=0)
 
CUTLASS_HOST_DEVICE ConstIteratoroperator++ ()
 Pre-increment. More...
 
CUTLASS_HOST_DEVICE ConstIteratoroperator-- ()
 Pre-decrement. More...
 
CUTLASS_HOST_DEVICE ConstIterator operator++ (int)
 Post-increment. More...
 
CUTLASS_HOST_DEVICE ConstIterator operator-- (int)
 Post-decrement. More...
 
CUTLASS_HOST_DEVICE bool operator== (ConstIterator const &it) const
 Returns true if iterators point to the same bit. More...
 
CUTLASS_HOST_DEVICE bool operator!= (ConstIterator const &it) const
 Returns false if iterators point to the same bit. More...
 
CUTLASS_HOST_DEVICE bool operator* () const
 Dereferences iterator. More...
 
+

Constructor & Destructor Documentation

+ +

◆ ConstIterator() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::ConstIterator (ConstIterator const & it)
+
+inline
+
+ +
+
+ +

◆ ConstIterator() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::ConstIterator (PredicateVector const & _vec,
int _start = 0 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ operator!=()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator!= (ConstIterator const & it) const
+
+inline
+
+ +
+
+ +

◆ operator*()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator* () const
+
+inline
+
+ +
+
+ +

◆ operator++() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator++ ()
+
+inline
+
+ +
+
+ +

◆ operator++() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator++ (int )
+
+inline
+
+ +
+
+ +

◆ operator--() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator-- ()
+
+inline
+
+ +
+
+ +

◆ operator--() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator-- (int )
+
+inline
+
+ +
+
+ +

◆ operator==()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator== (ConstIterator const & it) const
+
+inline
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator-members.html b/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator-members.html new file mode 100644 index 00000000..ca3ff04a --- /dev/null +++ b/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator-members.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator Member List
+
+ + + + + diff --git a/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator.html b/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator.html new file mode 100644 index 00000000..42a06938 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator.html @@ -0,0 +1,451 @@ + + + + + + + +Cutlass: cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator Class Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator Class Reference
+
+
+ +

An iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates. +

+ +

#include <predicate_vector.h>

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE Iterator (Iterator const &it)
 Copy constructor. More...
 
CUTLASS_HOST_DEVICE Iterator (PredicateVector &_vec, int _start=0)
 Constructs an iterator from a PredicateVector. More...
 
CUTLASS_HOST_DEVICE Iteratoroperator++ ()
 Pre-increment. More...
 
CUTLASS_HOST_DEVICE Iteratoroperator-- ()
 Pre-decrement. More...
 
CUTLASS_HOST_DEVICE Iterator operator++ (int)
 Post-increment. More...
 
CUTLASS_HOST_DEVICE Iterator operator-- (int)
 Post-decrement. More...
 
CUTLASS_HOST_DEVICE bool operator== (Iterator const &it) const
 Returns true if iterators point to the same bit. More...
 
CUTLASS_HOST_DEVICE bool operator!= (Iterator const &it) const
 Returns false if iterators point to the same bit. More...
 
CUTLASS_HOST_DEVICE bool get ()
 Gets the bit at the pointed to location. More...
 
CUTLASS_HOST_DEVICE bool operator* () const
 Dereferences iterator. More...
 
CUTLASS_HOST_DEVICE void set (bool value=true)
 Sets the bit at the pointed to location. More...
 
+

Constructor & Destructor Documentation

+ +

◆ Iterator() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::Iterator (Iterator const & it)
+
+inline
+
+ +
+
+ +

◆ Iterator() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::Iterator (PredicateVector_vec,
int _start = 0 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ get()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::get ()
+
+inline
+
+ +
+
+ +

◆ operator!=()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator!= (Iterator const & it) const
+
+inline
+
+ +
+
+ +

◆ operator*()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator* () const
+
+inline
+
+ +
+
+ +

◆ operator++() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Iterator& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator++ ()
+
+inline
+
+ +
+
+ +

◆ operator++() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Iterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator++ (int )
+
+inline
+
+ +
+
+ +

◆ operator--() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Iterator& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator-- ()
+
+inline
+
+ +
+
+ +

◆ operator--() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Iterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator-- (int )
+
+inline
+
+ +
+
+ +

◆ operator==()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator== (Iterator const & it) const
+
+inline
+
+ +
+
+ +

◆ set()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::set (bool value = true)
+
+inline
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/classcutlass_1_1TensorRef-members.html b/docs/generated-html/classcutlass_1_1TensorRef-members.html new file mode 100644 index 00000000..4bf37ad1 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1TensorRef-members.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::TensorRef< Storage_, Rank_ > Member List
+
+
+ +

This is the complete list of members for cutlass::TensorRef< Storage_, Rank_ >, including all inherited members.

+ + + + + + + + + + + + + + + + + + + + +
advance(Coord< Rank > const &b)cutlass::TensorRef< Storage_, Rank_ >inline
at(Coord< Rank > const &coord) constcutlass::TensorRef< Storage_, Rank_ >inline
at(int idx) constcutlass::TensorRef< Storage_, Rank_ >inline
convert()cutlass::TensorRef< Storage_, Rank_ >inline
data() constcutlass::TensorRef< Storage_, Rank_ >inline
good() constcutlass::TensorRef< Storage_, Rank_ >inline
leading_dim() constcutlass::TensorRef< Storage_, Rank_ >inline
offset(Coord< Rank > const &coord) constcutlass::TensorRef< Storage_, Rank_ >inline
operator+(Coord< Rank > const &b) constcutlass::TensorRef< Storage_, Rank_ >inline
operator-(Coord< Rank > const &b) constcutlass::TensorRef< Storage_, Rank_ >inline
operator[](Coord< Rank > const &coord) constcutlass::TensorRef< Storage_, Rank_ >inline
operator[](int idx) constcutlass::TensorRef< Storage_, Rank_ >inline
Rankcutlass::TensorRef< Storage_, Rank_ >static
reset(Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))cutlass::TensorRef< Storage_, Rank_ >inline
Storage typedefcutlass::TensorRef< Storage_, Rank_ >
stride() constcutlass::TensorRef< Storage_, Rank_ >inline
stride(int dim) constcutlass::TensorRef< Storage_, Rank_ >inline
TensorRef()cutlass::TensorRef< Storage_, Rank_ >inline
TensorRef(Storage *ptr, Coord< Rank > stride)cutlass::TensorRef< Storage_, Rank_ >inline
+ + + + diff --git a/docs/generated-html/classcutlass_1_1TensorRef.html b/docs/generated-html/classcutlass_1_1TensorRef.html new file mode 100644 index 00000000..05a9b3dd --- /dev/null +++ b/docs/generated-html/classcutlass_1_1TensorRef.html @@ -0,0 +1,704 @@ + + + + + + + +Cutlass: cutlass::TensorRef< Storage_, Rank_ > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::TensorRef< Storage_, Rank_ > Class Template Reference
+
+
+ +

Structure modeling a pointer and stride into a tensor. +

+ +

#include <tensor_ref.h>

+ + + + + +

+Public Types

typedef Storage_ Storage
 Data type of individual access. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE TensorRef ()
 Default ctor. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, Coord< Rank > stride)
 Constructs from a pointer, size, and stride. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))
 Updates the pointer, stride, and location within a TensorRef. More...
 
template<typename T >
TensorRef< T, Rankconvert ()
 Conversion function. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the TensorRef may be safely accessed. More...
 
CUTLASS_HOST_DEVICE Storagedata () const
 Returns the pointer to referenced data. More...
 
CUTLASS_HOST_DEVICE Coord< Rank > const & stride () const
 Returns the stride of the tensor. More...
 
CUTLASS_HOST_DEVICE int const & stride (int dim) const
 Returns the stride of the tensor in the given dimension. More...
 
CUTLASS_HOST_DEVICE int leading_dim () const
 Returns the maximum stride element as the 'leading dimension'. More...
 
CUTLASS_HOST_DEVICE long long offset (Coord< Rank > const &coord) const
 Computes the offset of an index from the origin of the tensor. More...
 
CUTLASS_HOST_DEVICE Storageat (Coord< Rank > const &coord) const
 Returns a reference to the element at a given Coord. More...
 
Storageoperator[] (Coord< Rank > const &coord) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE Storageat (int idx) const
 Returns a reference to the element at a given Coord. More...
 
Storageoperator[] (int idx) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE TensorRefadvance (Coord< Rank > const &b)
 Adds an offset to the pointer. More...
 
CUTLASS_HOST_DEVICE TensorRef operator+ (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRef operator- (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
+ + + + +

+Static Public Attributes

static int const Rank = Rank_
 Rank of tensor. More...
 
+

Member Typedef Documentation

+ +

◆ Storage

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + +
typedef Storage_ cutlass::TensorRef< Storage_, Rank_ >::Storage
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ TensorRef() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_ >::TensorRef ()
+
+inline
+
+ +
+
+ +

◆ TensorRef() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_ >::TensorRef (Storageptr,
Coord< Rankstride 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ advance()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef& cutlass::TensorRef< Storage_, Rank_ >::advance (Coord< Rank > const & b)
+
+inline
+
+ +
+
+ +

◆ at() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_ >::at (Coord< Rank > const & coord) const
+
+inline
+
+ +
+
+ +

◆ at() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_ >::at (int idx) const
+
+inline
+
+ +
+
+ +

◆ convert()

+ +
+
+
+template<typename Storage_, int Rank_>
+
+template<typename T >
+ + + + + +
+ + + + + + + +
TensorRef<T, Rank> cutlass::TensorRef< Storage_, Rank_ >::convert ()
+
+inline
+
+ +
+
+ +

◆ data()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Storage* cutlass::TensorRef< Storage_, Rank_ >::data () const
+
+inline
+
+ +
+
+ +

◆ good()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::TensorRef< Storage_, Rank_ >::good () const
+
+inline
+
+ +
+
+ +

◆ leading_dim()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE int cutlass::TensorRef< Storage_, Rank_ >::leading_dim () const
+
+inline
+
+ +
+
+ +

◆ offset()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE long long cutlass::TensorRef< Storage_, Rank_ >::offset (Coord< Rank > const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator+()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorRef< Storage_, Rank_ >::operator+ (Coord< Rank > const & b) const
+
+inline
+
+ +
+
+ +

◆ operator-()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorRef< Storage_, Rank_ >::operator- (Coord< Rank > const & b) const
+
+inline
+
+ +
+
+ +

◆ operator[]() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
Storage& cutlass::TensorRef< Storage_, Rank_ >::operator[] (Coord< Rank > const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator[]() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
Storage& cutlass::TensorRef< Storage_, Rank_ >::operator[] (int idx) const
+
+inline
+
+ +
+
+ +

◆ reset()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::TensorRef< Storage_, Rank_ >::reset (Storageptr = nullptr,
Coord< Rankstride = Coord<Rank>(0) 
)
+
+inline
+
+ +
+
+ +

◆ stride() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Coord<Rank> const& cutlass::TensorRef< Storage_, Rank_ >::stride () const
+
+inline
+
+ +
+
+ +

◆ stride() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE int const& cutlass::TensorRef< Storage_, Rank_ >::stride (int dim) const
+
+inline
+
+ +
+
+

Member Data Documentation

+ +

◆ Rank

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + +
int const cutlass::TensorRef< Storage_, Rank_ >::Rank = Rank_
+
+static
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/classcutlass_1_1TensorView-members.html b/docs/generated-html/classcutlass_1_1TensorView-members.html new file mode 100644 index 00000000..e9401f9c --- /dev/null +++ b/docs/generated-html/classcutlass_1_1TensorView-members.html @@ -0,0 +1,125 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::TensorView< T > Member List
+
+
+ +

This is the complete list of members for cutlass::TensorView< T >, including all inherited members.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
advance(Coord< Rank > const &b)cutlass::TensorRef< T, 4 >inline
at(Coord_t const &coord) constcutlass::TensorView< T >inline
at(Offset_t idx) constcutlass::TensorView< T >inline
Base typedefcutlass::TensorView< T >
const_ref()cutlass::TensorView< T >inline
ConstTensorRef_t typedefcutlass::TensorView< T >
contains(Coord_t const &coord) constcutlass::TensorView< T >inline
convert()cutlass::TensorRef< T, 4 >inline
Coord_t typedefcutlass::TensorView< T >
data() constcutlass::TensorView< T >inline
good() constcutlass::TensorView< T >inline
leading_dim() constcutlass::TensorRef< T, 4 >inline
offset(Coord_t const &coord) constcutlass::TensorView< T >inline
Offset_t typedefcutlass::TensorView< T >
operator+(Coord< Rank > const &b) constcutlass::TensorRef< T, 4 >inline
operator-(Coord< Rank > const &b) constcutlass::TensorRef< T, 4 >inline
operator=(TensorView const &_tensor)cutlass::TensorView< T >inline
operator[](Coord< Rank > const &coord) constcutlass::TensorView< T >inline
TensorRef< T, 4 >::operator[](int idx) constcutlass::TensorRef< T, 4 >inline
Rankcutlass::TensorView< T >static
ref()cutlass::TensorView< T >inline
ref() constcutlass::TensorView< T >inline
reset(TensorRef_t const &_ref=TensorRef_t(0), Coord_t const &_size=Coord_t())cutlass::TensorView< T >inline
TensorRef< T, 4 >::reset(Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))cutlass::TensorRef< T, 4 >inline
size() constcutlass::TensorView< T >inline
size(int dim) constcutlass::TensorView< T >inline
Storage typedefcutlass::TensorRef< T, 4 >
stride() constcutlass::TensorView< T >inline
stride(int dim) constcutlass::TensorView< T >inline
subview(Coord_t const &location, Coord_t size) constcutlass::TensorView< T >inline
TensorRef()cutlass::TensorRef< T, 4 >inline
TensorRef(Storage *ptr, Coord< Rank > stride)cutlass::TensorRef< T, 4 >inline
TensorRef_t typedefcutlass::TensorView< T >
TensorView()cutlass::TensorView< T >inline
TensorView(TensorRef_t const &_ref, Coord_t const &_size)cutlass::TensorView< T >inline
+ + + + diff --git a/docs/generated-html/classcutlass_1_1TensorView.html b/docs/generated-html/classcutlass_1_1TensorView.html new file mode 100644 index 00000000..7dba2322 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1TensorView.html @@ -0,0 +1,915 @@ + + + + + + + +Cutlass: cutlass::TensorView< T > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::TensorView< T > Class Template Reference
+
+
+ +

Host-side reference implementation of tensor operations. +

+ +

#include <tensor_view.h>

+
+Inheritance diagram for cutlass::TensorView< T >:
+
+
+ + +cutlass::TensorRef< T, 4 > + +
+ + + + + + + + + + + + + + + + + + + + + +

+Public Types

typedef TensorRef< T, 4 > Base
 Reference and stride. More...
 
typedef Base TensorRef_t
 Reference and stride. More...
 
typedef TensorRef< T const, 4 > ConstTensorRef_t
 Reference to constant type. More...
 
typedef int Offset_t
 Type used to compute the offset of an element to the base of a tensor. More...
 
typedef Coord< RankCoord_t
 Coordinate into tensor. More...
 
- Public Types inherited from cutlass::TensorRef< T, 4 >
typedef T Storage
 Data type of individual access. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE TensorView ()
 Default constructor. More...
 
CUTLASS_HOST_DEVICE TensorView (TensorRef_t const &_ref, Coord_t const &_size)
 Constructs a Tensor_view from a TensorRef and size. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the Tensor_view is bound to some memory. More...
 
CUTLASS_HOST_DEVICE T * data () const
 Returns a pointer to data. More...
 
CUTLASS_HOST_DEVICE void reset (TensorRef_t const &_ref=TensorRef_t(0), Coord_t const &_size=Coord_t())
 Updates the reference and size of a Tensor_view object. More...
 
CUTLASS_HOST_DEVICE TensorRef_tref ()
 Accesses the tensor reference pointing to data. More...
 
CUTLASS_HOST_DEVICE ConstTensorRef_t const_ref ()
 
CUTLASS_HOST_DEVICE TensorRef_t const & ref () const
 Accesses the tensor reference pointing to data. More...
 
CUTLASS_HOST_DEVICE Coord_t const & size () const
 Accesses the size. More...
 
CUTLASS_HOST_DEVICE int size (int dim) const
 Accesses the size. More...
 
CUTLASS_HOST_DEVICE Coord_t const & stride () const
 Accesses the stride. More...
 
CUTLASS_HOST_DEVICE int const & stride (int dim) const
 Accesses the stride. More...
 
CUTLASS_HOST_DEVICE TensorViewoperator= (TensorView const &_tensor)
 Assigns the Tensor_view. More...
 
CUTLASS_HOST_DEVICE Offset_t offset (Coord_t const &coord) const
 Returns the index of an element. More...
 
CUTLASS_HOST_DEVICE bool contains (Coord_t const &coord) const
 Determines whether a location is within a tensor. More...
 
CUTLASS_HOST_DEVICE T & at (Coord_t const &coord) const
 Element-wise accessor. More...
 
T & operator[] (Coord< Rank > const &coord) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE T & at (Offset_t idx) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE TensorView< T > subview (Coord_t const &location, Coord_t size) const
 Returns a Tensor_view given location and size quantities. More...
 
- Public Member Functions inherited from cutlass::TensorRef< T, 4 >
CUTLASS_HOST_DEVICE TensorRef ()
 Default ctor. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, Coord< Rank > stride)
 Constructs from a pointer, size, and stride. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))
 Updates the pointer, stride, and location within a TensorRef. More...
 
TensorRef< T, Rankconvert ()
 Conversion function. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the TensorRef may be safely accessed. More...
 
CUTLASS_HOST_DEVICE Storagedata () const
 Returns the pointer to referenced data. More...
 
CUTLASS_HOST_DEVICE Coord< Rank > const & stride () const
 Returns the stride of the tensor. More...
 
CUTLASS_HOST_DEVICE int const & stride (int dim) const
 Returns the stride of the tensor in the given dimension. More...
 
CUTLASS_HOST_DEVICE int leading_dim () const
 Returns the maximum stride element as the 'leading dimension'. More...
 
CUTLASS_HOST_DEVICE long long offset (Coord< Rank > const &coord) const
 Computes the offset of an index from the origin of the tensor. More...
 
CUTLASS_HOST_DEVICE Storageat (Coord< Rank > const &coord) const
 Returns a reference to the element at a given Coord. More...
 
CUTLASS_HOST_DEVICE Storageat (int idx) const
 Returns a reference to the element at a given Coord. More...
 
Storageoperator[] (Coord< Rank > const &coord) const
 Element-wise accessor. More...
 
Storageoperator[] (int idx) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE TensorRefadvance (Coord< Rank > const &b)
 Adds an offset to the pointer. More...
 
CUTLASS_HOST_DEVICE TensorRef operator+ (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRef operator- (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
+ + + + + + + + +

+Static Public Attributes

static int const Rank = TensorRef_t::Rank
 Rank of tensor. More...
 
- Static Public Attributes inherited from cutlass::TensorRef< T, 4 >
static int const Rank
 Rank of tensor. More...
 
+

Member Typedef Documentation

+ +

◆ Base

+ +
+
+
+template<typename T>
+ + + + +
typedef TensorRef<T, 4> cutlass::TensorView< T >::Base
+
+ +
+
+ +

◆ ConstTensorRef_t

+ +
+
+
+template<typename T>
+ + + + +
typedef TensorRef<T const, 4> cutlass::TensorView< T >::ConstTensorRef_t
+
+ +
+
+ +

◆ Coord_t

+ +
+
+
+template<typename T>
+ + + + +
typedef Coord<Rank> cutlass::TensorView< T >::Coord_t
+
+ +
+
+ +

◆ Offset_t

+ +
+
+
+template<typename T>
+ + + + +
typedef int cutlass::TensorView< T >::Offset_t
+
+ +
+
+ +

◆ TensorRef_t

+ +
+
+
+template<typename T>
+ + + + +
typedef Base cutlass::TensorView< T >::TensorRef_t
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ TensorView() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorView< T >::TensorView ()
+
+inline
+
+ +
+
+ +

◆ TensorView() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorView< T >::TensorView (TensorRef_t const & _ref,
Coord_t const & _size 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ at() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE T& cutlass::TensorView< T >::at (Coord_t const & coord) const
+
+inline
+
+ +
+
+ +

◆ at() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE T& cutlass::TensorView< T >::at (Offset_t idx) const
+
+inline
+
+ +
+
+ +

◆ const_ref()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstTensorRef_t cutlass::TensorView< T >::const_ref ()
+
+inline
+
+ +
+
+ +

◆ contains()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::TensorView< T >::contains (Coord_t const & coord) const
+
+inline
+
+ +
+
+ +

◆ data()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE T* cutlass::TensorView< T >::data () const
+
+inline
+
+ +
+
+ +

◆ good()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::TensorView< T >::good () const
+
+inline
+
+ +
+
+ +

◆ offset()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Offset_t cutlass::TensorView< T >::offset (Coord_t const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator=()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorView& cutlass::TensorView< T >::operator= (TensorView< T > const & _tensor)
+
+inline
+
+ +
+
+ +

◆ operator[]()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
T& cutlass::TensorView< T >::operator[] (Coord< Rank > const & coord) const
+
+inline
+
+ +
+
+ +

◆ ref() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE TensorRef_t& cutlass::TensorView< T >::ref ()
+
+inline
+
+ +
+
+ +

◆ ref() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE TensorRef_t const& cutlass::TensorView< T >::ref () const
+
+inline
+
+ +
+
+ +

◆ reset()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::TensorView< T >::reset (TensorRef_t const & _ref = TensorRef_t(0),
Coord_t const & _size = Coord_t() 
)
+
+inline
+
+ +
+
+ +

◆ size() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Coord_t const& cutlass::TensorView< T >::size () const
+
+inline
+
+ +
+
+ +

◆ size() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE int cutlass::TensorView< T >::size (int dim) const
+
+inline
+
+ +
+
+ +

◆ stride() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Coord_t const& cutlass::TensorView< T >::stride () const
+
+inline
+
+ +
+
+ +

◆ stride() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE int const& cutlass::TensorView< T >::stride (int dim) const
+
+inline
+
+ +
+
+ +

◆ subview()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE TensorView<T> cutlass::TensorView< T >::subview (Coord_t const & location,
Coord_t size 
) const
+
+inline
+
+ +
+
+

Member Data Documentation

+ +

◆ Rank

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + +
int const cutlass::TensorView< T >::Rank = TensorRef_t::Rank
+
+static
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/classcutlass_1_1TensorView.png b/docs/generated-html/classcutlass_1_1TensorView.png new file mode 100644 index 00000000..40500e8a Binary files /dev/null and b/docs/generated-html/classcutlass_1_1TensorView.png differ diff --git a/docs/generated-html/classcutlass_1_1platform_1_1unique__ptr-members.html b/docs/generated-html/classcutlass_1_1platform_1_1unique__ptr-members.html new file mode 100644 index 00000000..696f4788 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1platform_1_1unique__ptr-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::platform::unique_ptr< T, Deleter > Member List
+
+ + + + + diff --git a/docs/generated-html/classcutlass_1_1platform_1_1unique__ptr.html b/docs/generated-html/classcutlass_1_1platform_1_1unique__ptr.html new file mode 100644 index 00000000..cf455f2e --- /dev/null +++ b/docs/generated-html/classcutlass_1_1platform_1_1unique__ptr.html @@ -0,0 +1,554 @@ + + + + + + + +Cutlass: cutlass::platform::unique_ptr< T, Deleter > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::platform::unique_ptr< T, Deleter > Class Template Reference
+
+
+ +

std::unique_ptr +

+ +

#include <platform.h>

+ + + + + + + + +

+Public Types

typedef T * pointer
 
typedef T element_type
 
typedef Deleter deleter_type
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

 unique_ptr ()
 
 unique_ptr (pointer p)
 
 ~unique_ptr ()
 
pointer get () const noexcept
 Returns a pointer to the managed object or nullptr if no object is owned. More...
 
pointer release () noexcept
 Releases ownership of the managed object, if any. More...
 
void reset (pointer p=pointer()) noexcept
 Replaces the managed object, deleting the old object. More...
 
void swap (unique_ptr &other) noexcept
 Swaps the managed objects with *this and another unique_ptr. More...
 
Deleter & get_deleter () noexcept
 Returns the deleter object. More...
 
Deleter const & get_deleter () const noexcept
 Returns the deleter object. More...
 
 operator bool () const noexcept
 Checks whether an object is owned. More...
 
T & operator* () const
 Dereferences the unique_ptr. More...
 
pointer operator-> () const noexcept
 Returns a pointer to the managed object. More...
 
T & operator[] (size_t i) const
 Array access to managed object. More...
 
+

Member Typedef Documentation

+ +

◆ deleter_type

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + +
typedef Deleter cutlass::platform::unique_ptr< T, Deleter >::deleter_type
+
+ +
+
+ +

◆ element_type

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + +
typedef T cutlass::platform::unique_ptr< T, Deleter >::element_type
+
+ +
+
+ +

◆ pointer

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + +
typedef T* cutlass::platform::unique_ptr< T, Deleter >::pointer
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ unique_ptr() [1/2]

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
cutlass::platform::unique_ptr< T, Deleter >::unique_ptr ()
+
+inline
+
+ +
+
+ +

◆ unique_ptr() [2/2]

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + + +
cutlass::platform::unique_ptr< T, Deleter >::unique_ptr (pointer p)
+
+inline
+
+ +
+
+ +

◆ ~unique_ptr()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
cutlass::platform::unique_ptr< T, Deleter >::~unique_ptr ()
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ get()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer cutlass::platform::unique_ptr< T, Deleter >::get () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ get_deleter() [1/2]

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
Deleter& cutlass::platform::unique_ptr< T, Deleter >::get_deleter ()
+
+inlinenoexcept
+
+ +
+
+ +

◆ get_deleter() [2/2]

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
Deleter const& cutlass::platform::unique_ptr< T, Deleter >::get_deleter () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator bool()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
cutlass::platform::unique_ptr< T, Deleter >::operator bool () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator*()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
T& cutlass::platform::unique_ptr< T, Deleter >::operator* () const
+
+inline
+
+ +
+
+ +

◆ operator->()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer cutlass::platform::unique_ptr< T, Deleter >::operator-> () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator[]()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + + +
T& cutlass::platform::unique_ptr< T, Deleter >::operator[] (size_t i) const
+
+inline
+
+ +
+
+ +

◆ release()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer cutlass::platform::unique_ptr< T, Deleter >::release ()
+
+inlinenoexcept
+
+ +
+
+ +

◆ reset()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + + +
void cutlass::platform::unique_ptr< T, Deleter >::reset (pointer p = pointer())
+
+inlinenoexcept
+
+ +
+
+ +

◆ swap()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + + +
void cutlass::platform::unique_ptr< T, Deleter >::swap (unique_ptr< T, Deleter > & other)
+
+inlinenoexcept
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/classes.html b/docs/generated-html/classes.html new file mode 100644 index 00000000..9896653f --- /dev/null +++ b/docs/generated-html/classes.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: Class Index + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Class Index
+
+
+
a | b | c | d | e | f | g | h | i | l | m | n | p | r | s | t | u | v | w
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  a  
+
FragmentMultiplyAdd (cutlass::gemm)   IgemmEpilogueScalar (cutlass::gemm)   Load< Scalar_, Lanes_, Memory_, true, 8 > (cutlass)   GlobalLoadStreamBase::SharedStorage (cutlass::gemm)   
FragmentMultiplyAdd< half > (cutlass::gemm)   IgemmEpilogueScalar< int > (cutlass::gemm)   log2_down (cutlass)   SimplifiedGemmEpilogueTraits (cutlass::gemm)   
aligned_chunk (cutlass::platform)   FragmentStore (cutlass)   IgemmEpilogueTraits (cutlass::gemm)   log2_down< N, 1, Count > (cutlass)   SimplifiedGemmTraits (cutlass::gemm)   
aligned_storage (cutlass::platform)   FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmEpilogueTraitsHelper (cutlass::gemm)   log2_up (cutlass)   SimplifiedGemmTraitsHelper (cutlass::gemm)   
AlignedStruct (cutlass)   FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmFloatToInt8Converter (cutlass::gemm)   log2_up< N, 1, Count > (cutlass)   sqrt_est (cutlass)   
alignment_of (cutlass::platform)   
  g  
+
IgemmGlobalLoadTransformer (cutlass::gemm)   
  m  
+
StorageType (cutlass)   
alignment_of< const value_t > (cutlass::platform)   IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > (cutlass::gemm)   StorageType< 1 > (cutlass)   
alignment_of< const volatile value_t > (cutlass::platform)   Gemm (cutlass::gemm)   IgemmGlobalStoreTransformer (cutlass::gemm)   GemmTraits::MainLoopSharedStorage (cutlass::gemm)   StorageType< 2 > (cutlass)   
alignment_of< double2 > (cutlass::platform)   GemmConfig (cutlass::gemm)   IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > > (cutlass::gemm)   MatrixLayout (cutlass)   StorageType< 4 > (cutlass)   
alignment_of< double4 > (cutlass::platform)   GemmDesc (cutlass::gemm)   IgemmInt8ToFloatConverter (cutlass::gemm)   MemorySpace (cutlass)   Store (cutlass)   
alignment_of< float4 > (cutlass::platform)   GemmEpilogue (cutlass::gemm)   IgemmSharedStoreTransformer (cutlass::gemm)   
  n  
+
Store< double, 2, Memory_, true, 16 > (cutlass)   
alignment_of< int4 > (cutlass::platform)   GemmEpilogueTraits (cutlass::gemm)   IgemmSwizzle (cutlass::gemm)   Store< Scalar_, Lanes_, Memory_, true, 16 > (cutlass)   
alignment_of< long4 > (cutlass::platform)   GemmEpilogueTraitsHelper (cutlass::gemm)   IgemmTileTraitsHelperA (cutlass::gemm)   nullptr_t (cutlass::platform)   Store< Scalar_, Lanes_, Memory_, true, 4 > (cutlass)   
alignment_of< longlong2 > (cutlass::platform)   GemmGlobalIteratorAb (cutlass::gemm)   IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   
  p  
+
Store< Scalar_, Lanes_, Memory_, true, 8 > (cutlass)   
alignment_of< longlong4 > (cutlass::platform)   GemmGlobalIteratorCd (cutlass::gemm)   IgemmTileTraitsHelperB (cutlass::gemm)   GemmTraits::StreamSharedStorage (cutlass::gemm)   
alignment_of< uint4 > (cutlass::platform)   GemmGlobalTileCdTraits (cutlass::gemm)   IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   alignment_of::pad (cutlass::platform)   GemmEpilogueTraits::StreamSharedStorage (cutlass::gemm)   
alignment_of< ulong4 > (cutlass::platform)   GemmGlobalTileTraits (cutlass::gemm)   IgemmTraits (cutlass::gemm)   WmmaGemmGlobalIteratorCd::Params (cutlass::gemm)   
  t  
+
alignment_of< ulonglong2 > (cutlass::platform)   GemmMultiplicandTraits (cutlass::gemm)   IgemmTraitsHelper (cutlass::gemm)   GemmTraits::Params (cutlass::gemm)   
alignment_of< ulonglong4 > (cutlass::platform)   GemmOperand (cutlass)   IgemmTransformerA (cutlass::gemm)   GlobalLoadStreamBase::Params (cutlass::gemm)   TensorRef (cutlass)   
alignment_of< volatile value_t > (cutlass::platform)   GemmOperandTraitsAb (cutlass::gemm)   IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   TileIteratorBase::Params (cutlass)   TensorView (cutlass)   
  b  
+
GemmSharedLoadTileATraits (cutlass::gemm)   IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   GemmGlobalIteratorCd::Params (cutlass::gemm)   ThreadMultiplyAdd (cutlass::gemm)   
GemmSharedLoadTileBTraits (cutlass::gemm)   IgemmTransformerB (cutlass::gemm)   TileLoadIterator::Params (cutlass)   ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half > (cutlass::gemm)   
bool_constant (cutlass::platform)   GemmSharedLoadTileDTraits (cutlass::gemm)   IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   TileStoreIterator::Params (cutlass)   ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int > (cutlass::gemm)   
  c  
+
GemmSharedStoreTileAbTraits (cutlass::gemm)   IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   GemmEpilogueTraits::Params (cutlass::gemm)   GemmSharedLoadTileBTraits::ThreadOffset (cutlass::gemm)   
GemmSharedStoreTileDTraits (cutlass::gemm)   integral_constant (cutlass::platform)   Gemm::Params (cutlass::gemm)   GemmGlobalTileCdTraits::ThreadOffset (cutlass::gemm)   
ClearAccumulators (cutlass::gemm)   GemmSharedStoreWithSkewTileAbTraits (cutlass::gemm)   is_arithmetic (cutlass::platform)   SharedLoadStream::Params (cutlass::gemm)   IgemmContiguousGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromShape (cutlass)   GemmTileTraitsHelperA (cutlass::gemm)   is_base_of (cutlass::platform)   LinearScaling::Params (cutlass::gemm)   GemmGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > (cutlass)   GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_base_of_helper (cutlass::platform)   GemmGlobalIteratorAb::Params (cutlass::gemm)   GemmSharedLoadTileDTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > (cutlass)   GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_floating_point (cutlass::platform)   plus (cutlass::platform)   GemmSharedLoadTileATraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromStrides (cutlass)   GemmTileTraitsHelperB (cutlass::gemm)   is_fundamental (cutlass::platform)   PredicateTileAdapter (cutlass)   GemmSharedStoreTileDTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > (cutlass)   GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_integral (cutlass::platform)   PredicateVector (cutlass)   HgemmCrosswiseGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > (cutlass)   GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_integral< char > (cutlass::platform)   ProjectOperand (cutlass::gemm)   GemmSharedStoreTileAbTraits::ThreadOffset (cutlass::gemm)   
ComputeThreadOffsetFromStrides (cutlass)   GemmTraits (cutlass::gemm)   is_integral< const T > (cutlass::platform)   ProjectOperand< GemmOperand::kA, Kstrided > (cutlass::gemm)   TileTraitsWarpRake::ThreadOffset (cutlass)   
ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > (cutlass)   GetExtent (cutlass::gemm)   is_integral< const volatile T > (cutlass::platform)   ProjectOperand< GemmOperand::kB, Kstrided > (cutlass::gemm)   GemmSharedStoreWithSkewTileAbTraits::ThreadOffset (cutlass::gemm)   
ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > (cutlass)   GetExtent< GemmOperand::kA, Tile_ > (cutlass::gemm)   is_integral< int > (cutlass::platform)   ProjectOperand< GemmOperand::kC, true > (cutlass::gemm)   WmmaGemmGlobalIteratorCdTraits::ThreadOffset (cutlass::gemm)   
conditional (cutlass::platform)   GetExtent< GemmOperand::kB, Tile_ > (cutlass::gemm)   is_integral< long > (cutlass::platform)   ProjectOperand< GemmOperand::kD, true > (cutlass::gemm)   TiledThreadOffset (cutlass)   
conditional< false, T, F > (cutlass::platform)   GemmTraits::GlobalLoadStream (cutlass::gemm)   is_integral< long long > (cutlass::platform)   
  r  
+
TileIteratorBase (cutlass)   
PredicateVector::ConstIterator (cutlass)   GlobalLoadStream (cutlass::gemm)   is_integral< short > (cutlass::platform)   TileLoadIterator (cutlass)   
ConstPredicateTileAdapter (cutlass)   GlobalLoadStreamBase (cutlass::gemm)   is_integral< signed char > (cutlass::platform)   remove_const (cutlass::platform)   TileStoreIterator (cutlass)   
Convert (cutlass)   greater (cutlass::platform)   is_integral< unsigned char > (cutlass::platform)   remove_const< const T > (cutlass::platform)   TileTraits (cutlass)   
Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > (cutlass)   
  h  
+
is_integral< unsigned int > (cutlass::platform)   remove_cv (cutlass::platform)   TileTraitsContiguousMajor (cutlass)   
Coord (cutlass)   is_integral< unsigned long > (cutlass::platform)   remove_volatile (cutlass::platform)   TileTraitsStandard (cutlass)   
Copy (cutlass)   HgemmConfig (cutlass::gemm)   is_integral< unsigned long long > (cutlass::platform)   remove_volatile< volatile T > (cutlass::platform)   TileTraitsStrideMajor (cutlass)   
  d  
+
HgemmCrosswiseGlobalTileTraits (cutlass::gemm)   is_integral< unsigned short > (cutlass::platform)   ReshapeThreads (cutlass::gemm)   TileTraitsWarpRake (cutlass)   
HgemmSwizzle (cutlass::gemm)   is_integral< volatile T > (cutlass::platform)   ReshapeThreads< Tile_, Threads_, true > (cutlass::gemm)   PredicateVector::TrivialIterator (cutlass)   
default_delete (cutlass::platform)   HgemmTileTraitsHelperA (cutlass::gemm)   is_pointer (cutlass::platform)   ReshapeTile (cutlass)   TrivialPredicateTileAdapter (cutlass)   
default_delete< T[]> (cutlass::platform)   HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_pointer_helper (cutlass::platform)   ReshapeTile< Tile_, kAccessSize_, true > (cutlass)   
  u  
+
DgemmConfig (cutlass::gemm)   HgemmTileTraitsHelperB (cutlass::gemm)   is_pointer_helper< T * > (cutlass::platform)   
  s  
+
DgemmTraits (cutlass::gemm)   HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_pow2 (cutlass)   unique_ptr (cutlass::platform)   
divide_assert (cutlass)   HgemmTraits (cutlass::gemm)   is_same (cutlass::platform)   SgemmConfig (cutlass::gemm)   
  v  
+
is_base_of_helper::dummy (cutlass::platform)   HgemmTraitsHelper (cutlass::gemm)   is_same< A, A > (cutlass::platform)   SgemmTraits (cutlass::gemm)   
  e  
+
HgemmTransformerA (cutlass::gemm)   is_trivially_copyable (cutlass::platform)   Shape (cutlass)   Vector (cutlass)   
HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   is_void (cutlass::platform)   ShapeAdd (cutlass)   Vector< half, kLanes_ > (cutlass)   
enable_if (cutlass::platform)   HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   is_volatile (cutlass::platform)   ShapeCount (cutlass)   Vectorize (cutlass)   
enable_if< false, T > (cutlass::platform)   HgemmTransformerB (cutlass::gemm)   is_volatile< volatile T > (cutlass::platform)   ShapeDiv (cutlass)   Vectorize< Element_, 1 > (cutlass)   
Extent (cutlass)   HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   PredicateVector::Iterator (cutlass)   ShapeMax (cutlass)   VectorTraits (cutlass)   
Extent< Vector< T, Lanes > > (cutlass)   HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   IteratorAdvance (cutlass)   ShapeMin (cutlass)   VectorTraits< Vector< T, Lanes > > (cutlass)   
Extent< Vector< T, Lanes > const > (cutlass)   
  i  
+
IteratorFragment (cutlass)   ShapeMul (cutlass)   VectorTraits< Vector< T, Lanes > const > (cutlass)   
  f  
+
  l  
+
ShapeScale (cutlass)   
  w  
+
Identity (cutlass)   ShapeStrides (cutlass)   
Fragment (cutlass)   IdentityBlockSwizzle (cutlass::gemm)   less (cutlass::platform)   ShapeSub (cutlass)   WmmaGemmGlobalIteratorCd (cutlass::gemm)   
FragmentConstIterator (cutlass)   IgemmConfig (cutlass::gemm)   LinearScaling (cutlass::gemm)   GemmTraits::SharedLoadStream (cutlass::gemm)   WmmaGemmGlobalIteratorCdTraits (cutlass::gemm)   
FragmentIterator (cutlass)   IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ > (cutlass::gemm)   Load (cutlass)   SharedLoadStream (cutlass::gemm)   
FragmentLoad (cutlass)   IgemmContiguousGlobalTileTraits (cutlass::gemm)   Load< double, 2, Memory_, true, 16 > (cutlass)   ClearAccumulators::SharedStorage (cutlass::gemm)   
FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmEpilogue (cutlass::gemm)   Load< Scalar_, Lanes_, Memory_, true, 16 > (cutlass)   GemmEpilogueTraits::SharedStorage (cutlass::gemm)   
FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmEpilogue< GemmEpilogueTraits_, true > (cutlass::gemm)   Load< Scalar_, Lanes_, Memory_, true, 4 > (cutlass)   GemmTraits::SharedStorage (cutlass::gemm)   
+
a | b | c | d | e | f | g | h | i | l | m | n | p | r | s | t | u | v | w
+
+ + + + diff --git a/docs/generated-html/classnv__std_1_1unique__ptr-members.html b/docs/generated-html/classnv__std_1_1unique__ptr-members.html new file mode 100644 index 00000000..5c9df4c5 --- /dev/null +++ b/docs/generated-html/classnv__std_1_1unique__ptr-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
nv_std::unique_ptr< T, Deleter > Member List
+
+ + + + + diff --git a/docs/generated-html/classnv__std_1_1unique__ptr.html b/docs/generated-html/classnv__std_1_1unique__ptr.html new file mode 100644 index 00000000..9abd8886 --- /dev/null +++ b/docs/generated-html/classnv__std_1_1unique__ptr.html @@ -0,0 +1,554 @@ + + + + + + + +Cutlass: nv_std::unique_ptr< T, Deleter > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
nv_std::unique_ptr< T, Deleter > Class Template Reference
+
+
+ +

std::unique_ptr +

+ +

#include <nv_std.h>

+ + + + + + + + +

+Public Types

typedef T * pointer
 
typedef T element_type
 
typedef Deleter deleter_type
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

 unique_ptr ()
 
 unique_ptr (pointer p)
 
 ~unique_ptr ()
 
pointer get () const noexcept
 Returns a pointer to the managed object or nullptr if no object is owned. More...
 
pointer release () noexcept
 Releases ownership of the managed object, if any. More...
 
void reset (pointer p=pointer()) noexcept
 Replaces the managed object, deleting the old object. More...
 
void swap (unique_ptr &other) noexcept
 Swaps the managed objects with *this and another unique_ptr. More...
 
Deleter & get_deleter () noexcept
 Returns the deleter object. More...
 
Deleter const & get_deleter () const noexcept
 Returns the deleter object. More...
 
 operator bool () const noexcept
 Checks whether an object is owned. More...
 
T & operator* () const
 Dereferences the unique_ptr. More...
 
pointer operator-> () const noexcept
 Returns a pointer to the managed object. More...
 
T & operator[] (size_t i) const
 Array access to managed object. More...
 
+

Member Typedef Documentation

+ +

◆ deleter_type

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + +
typedef Deleter nv_std::unique_ptr< T, Deleter >::deleter_type
+
+ +
+
+ +

◆ element_type

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + +
typedef T nv_std::unique_ptr< T, Deleter >::element_type
+
+ +
+
+ +

◆ pointer

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + +
typedef T* nv_std::unique_ptr< T, Deleter >::pointer
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ unique_ptr() [1/2]

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
nv_std::unique_ptr< T, Deleter >::unique_ptr ()
+
+inline
+
+ +
+
+ +

◆ unique_ptr() [2/2]

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + + +
nv_std::unique_ptr< T, Deleter >::unique_ptr (pointer p)
+
+inline
+
+ +
+
+ +

◆ ~unique_ptr()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
nv_std::unique_ptr< T, Deleter >::~unique_ptr ()
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ get()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer nv_std::unique_ptr< T, Deleter >::get () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ get_deleter() [1/2]

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
Deleter& nv_std::unique_ptr< T, Deleter >::get_deleter ()
+
+inlinenoexcept
+
+ +
+
+ +

◆ get_deleter() [2/2]

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
Deleter const& nv_std::unique_ptr< T, Deleter >::get_deleter () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator bool()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
nv_std::unique_ptr< T, Deleter >::operator bool () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator*()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
T& nv_std::unique_ptr< T, Deleter >::operator* () const
+
+inline
+
+ +
+
+ +

◆ operator->()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer nv_std::unique_ptr< T, Deleter >::operator-> () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator[]()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + + +
T& nv_std::unique_ptr< T, Deleter >::operator[] (size_t i) const
+
+inline
+
+ +
+
+ +

◆ release()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer nv_std::unique_ptr< T, Deleter >::release ()
+
+inlinenoexcept
+
+ +
+
+ +

◆ reset()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + + +
void nv_std::unique_ptr< T, Deleter >::reset (pointer p = pointer())
+
+inlinenoexcept
+
+ +
+
+ +

◆ swap()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + + +
void nv_std::unique_ptr< T, Deleter >::swap (unique_ptr< T, Deleter > & other)
+
+inlinenoexcept
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/clear__accumulators_8h.html b/docs/generated-html/clear__accumulators_8h.html new file mode 100644 index 00000000..b4bd3b39 --- /dev/null +++ b/docs/generated-html/clear__accumulators_8h.html @@ -0,0 +1,112 @@ + + + + + + + +Cutlass: clear_accumulators.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
clear_accumulators.h File Reference
+
+
+ +

Defines abstractions for efficiently clearing accumulator tiles. +More...

+
#include <cutlass/vector.h>
+
+

Go to the source code of this file.

+ + + + + + + +

+Classes

struct  cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >
 
struct  cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorage
 The shared storage. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/clear__accumulators_8h_source.html b/docs/generated-html/clear__accumulators_8h_source.html new file mode 100644 index 00000000..1a6f517f --- /dev/null +++ b/docs/generated-html/clear__accumulators_8h_source.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: clear_accumulators.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
clear_accumulators.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/vector.h>
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename Scalar_, int kLanes_ = 1>
40  struct SharedStorage {};
41 
43  CUTLASS_DEVICE ClearAccumulators(SharedStorage& shared_storage) {}
44 
46  template <typename Fragment_>
47  CUTLASS_DEVICE void clear(Fragment_& fragment) {
48  fragment.clear();
49  }
50 };
51 
53 
54 } // namespace gemm
55 } // namespace cutlass
Definition: convert.h:33
+
Definition: clear_accumulators.h:38
+
CUTLASS_DEVICE ClearAccumulators(SharedStorage &shared_storage)
Ctor.
Definition: clear_accumulators.h:43
+
Defines a 1D vector of elements held in the registers of each thread.
+
CUTLASS_DEVICE void clear(Fragment_ &fragment)
Clear the fragment.
Definition: clear_accumulators.h:47
+
The shared storage.
Definition: clear_accumulators.h:40
+
+ + + + diff --git a/docs/generated-html/closed.png b/docs/generated-html/closed.png new file mode 100644 index 00000000..f820ec9c Binary files /dev/null and b/docs/generated-html/closed.png differ diff --git a/docs/generated-html/convert_8h.html b/docs/generated-html/convert_8h.html new file mode 100644 index 00000000..422c5201 --- /dev/null +++ b/docs/generated-html/convert_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: convert.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
convert.h File Reference
+
+
+ +

Defines conversion operations among Fragments of different base type. +More...

+
#include <cutlass/fragment.h>
+
+

Go to the source code of this file.

+ + + + + + + + +

+Classes

struct  cutlass::Convert< InputFragment_, OutputFragment_ >
 
struct  cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
 
struct  cutlass::Copy< Fragment_ >
 
+ + + +

+Namespaces

 cutlass
 
+
+ + + + diff --git a/docs/generated-html/convert_8h_source.html b/docs/generated-html/convert_8h_source.html new file mode 100644 index 00000000..6e877d29 --- /dev/null +++ b/docs/generated-html/convert_8h_source.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: convert.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
convert.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/fragment.h>
32 
33 namespace cutlass {
34 
36 
37 template <typename InputFragment_, typename OutputFragment_>
38 struct Convert {};
39 
41 
42 template <typename InputScalar_, typename OutputScalar_, int kScalars_>
43 struct Convert<Fragment<InputScalar_, kScalars_>, Fragment<OutputScalar_, kScalars_> > {
48 
50  CUTLASS_DEVICE Convert() {}
51 
53  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
54  transform(src, 0, dst);
55  }
56 
58  template <typename Fragment_>
59  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
60  for (int i = 0; i < kScalars_; ++i) {
61  dst[i] = static_cast<OutputScalar_>(src[i + offset]);
62  }
63  }
64 };
65 
67 
68 template <typename Fragment_>
69 struct Copy {
71  typedef Fragment_ InputFragment;
73  typedef Fragment_ OutputFragment;
74 
76  CUTLASS_DEVICE Copy() {}
77 
79  CUTLASS_DEVICE void transform(Fragment_ const& src, Fragment_& dst) { transform(src, 0, dst); }
80 
82  template <typename InputFragment_>
83  CUTLASS_DEVICE void transform(InputFragment_ const& src, int offset, Fragment_& dst) {
84  if (sizeof(typename Fragment_::Element) == 8) {
85  uint64_t const* src_ptr = reinterpret_cast<uint64_t const*>(&src[offset]);
86  uint64_t* dst_ptr = reinterpret_cast<uint64_t*>(&dst[0]);
87  for (int i = 0; i < sizeof(Fragment_) / 8; ++i) {
88  dst_ptr[i] = src_ptr[i];
89  }
90  } else {
91  uint32_t const* src_ptr = reinterpret_cast<uint32_t const*>(&src[offset]);
92  uint32_t* dst_ptr = reinterpret_cast<uint32_t*>(&dst[0]);
93  for (int i = 0; i < sizeof(Fragment_) / 4; ++i) {
94  dst_ptr[i] = src_ptr[i];
95  }
96  }
97  }
98 };
99 
101 
102 } // namespace cutlass
Definition: convert.h:33
+
Fragment< OutputScalar_, kScalars_ > OutputFragment
The output fragment.
Definition: convert.h:47
+
Definition: convert.h:69
+
CUTLASS_DEVICE void transform(Fragment_ const &src, Fragment_ &dst)
Transform a fragment.
Definition: convert.h:79
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
CUTLASS_DEVICE void transform(InputFragment_ const &src, int offset, Fragment_ &dst)
Transform a fragment.
Definition: convert.h:83
+ +
Fragment_ InputFragment
The input fragment.
Definition: convert.h:71
+
CUTLASS_DEVICE void transform(InputFragment const &src, OutputFragment &dst)
Transform a fragment.
Definition: convert.h:53
+
CUTLASS_DEVICE Copy()
Ctor.
Definition: convert.h:76
+
Fragment_ OutputFragment
The output fragment.
Definition: convert.h:73
+
Fragment< InputScalar_, kScalars_ > InputFragment
The input fragment.
Definition: convert.h:45
+
CUTLASS_DEVICE void transform(Fragment_ const &src, int offset, OutputFragment &dst)
Transform a fragment.
Definition: convert.h:59
+
Definition: convert.h:38
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
+ + + + diff --git a/docs/generated-html/coord_8h.html b/docs/generated-html/coord_8h.html new file mode 100644 index 00000000..51650386 --- /dev/null +++ b/docs/generated-html/coord_8h.html @@ -0,0 +1,139 @@ + + + + + + + +Cutlass: coord.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
coord.h File Reference
+
+
+ +

A Coord is a coordinate of arbitrary rank into a tensor or matrix. +More...

+
#include <cutlass/cutlass.h>
+
+

Go to the source code of this file.

+ + + + + + + + +

+Classes

struct  cutlass::Identity
 Describes identity elements. More...
 
struct  cutlass::Coord< N_ >
 Statically-sized array specifying Coords within a tensor. More...
 
+ + + +

+Namespaces

 cutlass
 
+ + + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

CUTLASS_HOST_DEVICE Coord< 1 > cutlass::make_Coord (int _0)
 Helper to make a 2-element coordinate. More...
 
CUTLASS_HOST_DEVICE Coord< 2 > cutlass::make_Coord (int _0, int _1)
 Helper to make a 2-element coordinate. More...
 
CUTLASS_HOST_DEVICE Coord< 3 > cutlass::make_Coord (int _0, int _1, int _2)
 Helper to make a 3-element coordinate. More...
 
CUTLASS_HOST_DEVICE Coord< 4 > cutlass::make_Coord (int _0, int _1, int _2, int _3)
 Helper to make a 4-element coordinate. More...
 
CUTLASS_HOST_DEVICE Coord< 2 > cutlass::get_Coord_hw (Coord< 3 > const &coord)
 Getter. More...
 
CUTLASS_HOST_DEVICE Coord< 2 > cutlass::get_Coord_hw (Coord< 4 > const &coord)
 Getter. More...
 
CUTLASS_HOST_DEVICE Coord< 3 > cutlass::get_Coord_hwc (Coord< 4 > const &coord)
 Getter. More...
 
CUTLASS_HOST_DEVICE Coord< 3 > cutlass::get_Coord_dhw (Coord< 4 > const &coord)
 Getter. More...
 
+
+ + + + diff --git a/docs/generated-html/coord_8h_source.html b/docs/generated-html/coord_8h_source.html new file mode 100644 index 00000000..71ec92e1 --- /dev/null +++ b/docs/generated-html/coord_8h_source.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: coord.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
coord.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/cutlass.h>
32 
33 namespace cutlass {
34 
36 
38 struct Identity {
41  enum Kind { Additive = 0, Multiplicative = 1 };
42 };
43 
45 
47 template <int N_>
48 struct Coord {
49  //
50  // Type and constant definitions
51  //
52 
53  static int const N = N_;
54 
55  //
56  // Data members
57  //
58 
60  int idx[N];
61 
62  //
63  // Methods
64  //
65 
68  Coord(int value = 0) {
69  for (int i = 0; i < N; ++i) {
70  idx[i] = value;
71  }
72  }
73 
76  Coord(int _idx[]) {
77  for (int i = 0; i < N; ++i) {
78  idx[i] = _idx[i];
79  }
80  }
81 
84  Coord operator+(Coord const& b) const {
85  Coord c;
86  for (int i = 0; i < N; ++i) {
87  c.idx[i] = idx[i] + b.idx[i];
88  }
89  return c;
90  }
91 
94  Coord operator-(Coord const& b) const {
95  Coord c;
96  for (int i = 0; i < N; ++i) {
97  c.idx[i] = idx[i] - b.idx[i];
98  }
99  return c;
100  }
101 
104  Coord operator*(Coord const& b) const {
105  Coord c;
106  for (int i = 0; i < N; ++i) {
107  c.idx[i] = idx[i] * b.idx[i];
108  }
109  return c;
110  }
111 
114  Coord operator/(Coord const& b) const {
115  Coord c;
116  for (int i = 0; i < N; ++i) {
117  c.idx[i] = idx[i] / b.idx[i];
118  }
119  return c;
120  }
121 
124  Coord& operator+=(Coord const& b) {
125  for (int i = 0; i < N; ++i) {
126  idx[i] += b.idx[i];
127  }
128  return *this;
129  }
130 
133  Coord& operator-=(Coord const& b) {
134  for (int i = 0; i < N; ++i) {
135  idx[i] -= b.idx[i];
136  }
137  return *this;
138  }
139 
142  Coord& operator*=(Coord const& b) {
143  for (int i = 0; i < N; ++i) {
144  idx[i] *= b.idx[i];
145  }
146  return *this;
147  }
148 
151  Coord& operator/=(Coord const& b) {
152  for (int i = 0; i < N; ++i) {
153  idx[i] /= b.idx[i];
154  }
155  return *this;
156  }
157 
159  CUTLASS_HOST_DEVICE int& operator[](int dim) { return idx[dim]; }
160 
162  CUTLASS_HOST_DEVICE int const& operator[](int dim) const { return idx[dim]; }
163 
165  template <typename T>
166  CUTLASS_HOST_DEVICE T dot(Coord const& b, T sum) const {
167  for (int i = 0; i < N; ++i) {
168  sum += idx[i] * b.idx[i];
169  }
170  return sum;
171  }
172 
174  template <typename T>
175  CUTLASS_HOST_DEVICE T dot(Coord const& b) const {
176  T sum = T(0);
177  for (int i = 0; i < N; ++i) {
178  sum += idx[i] * b.idx[i];
179  }
180  return sum;
181  }
182 
184  template <int Dim>
186  return idx[Dim];
187  }
188 
191  int& at(int dim) { return idx[dim]; }
192 
194  template <int Dim>
195  CUTLASS_HOST_DEVICE int const& at() const {
196  return idx[Dim];
197  }
198 
201  int const& at(int dim) const { return idx[dim]; }
202 
205  bool operator==(Coord<N> const& b) const {
206  bool equal = true;
207  for (int i = 0; equal && i < N; ++i) {
208  equal = (idx[i] == b.idx[i]);
209  }
210  return equal;
211  }
212 
215  bool operator!=(Coord<N> const& b) const { return !(*this == b); }
216 
219  Coord& clamp(Coord<N> const& max, Coord<N> const& min = Coord<N>()) {
220  for (int i = 0; i < N; ++i) {
221  idx[i] = __NV_STD_MAX(__NV_STD_MIN(idx[i], max.idx[i]), min.idx[i]);
222  }
223  return *this;
224  }
225 
228  int count() const {
229  int product = idx[0];
230  for (int i = 1; i < N; ++i) {
231  product *= idx[i];
232  }
233  return product;
234  }
235 };
236 
238 
242  int values[1] = {_0};
243  return Coord<1>(values);
244 }
245 
248 Coord<2> make_Coord(int _0, int _1) {
249  int values[2] = {_0, _1};
250  return Coord<2>(values);
251 }
252 
255 Coord<3> make_Coord(int _0, int _1, int _2) {
256  int values[3] = {_0, _1, _2};
257  return Coord<3>(values);
258 }
259 
262 Coord<4> make_Coord(int _0, int _1, int _2, int _3) {
263  int values[4] = {_0, _1, _2, _3};
264  return Coord<4>(values);
265 }
266 
268 
271 Coord<2> get_Coord_hw(Coord<3> const& coord) { return make_Coord(coord[1], coord[2]); }
272 
275 Coord<2> get_Coord_hw(Coord<4> const& coord) { return make_Coord(coord[1], coord[2]); }
276 
279 Coord<3> get_Coord_hwc(Coord<4> const& coord) { return make_Coord(coord[1], coord[2], coord[3]); }
280 
283 Coord<3> get_Coord_dhw(Coord<4> const& coord) { return make_Coord(coord[0], coord[1], coord[2]); }
284 
286 
287 } // namespace cutlass
CUTLASS_HOST_DEVICE int const & operator[](int dim) const
Member access operator.
Definition: coord.h:162
+
CUTLASS_HOST_DEVICE int count() const
Returns the product of all elements.
Definition: coord.h:228
+
Describes identity elements.
Definition: coord.h:38
+
CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)
std::max
Definition: platform.h:207
+
Definition: convert.h:33
+
CUTLASS_HOST_DEVICE bool operator==(Coord< N > const &b) const
Determines if two Coord<> objects are equal.
Definition: coord.h:205
+
CUTLASS_HOST_DEVICE Coord & operator+=(Coord const &b)
In-place addition.
Definition: coord.h:124
+
CUTLASS_HOST_DEVICE bool operator!=(Coord< N > const &b) const
Not equal.
Definition: coord.h:215
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_hwc(Coord< 4 > const &coord)
Getter.
Definition: coord.h:279
+
CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_dhw(Coord< 4 > const &coord)
Getter.
Definition: coord.h:283
+
CUTLASS_HOST_DEVICE Coord & clamp(Coord< N > const &max, Coord< N > const &min=Coord< N >())
Clamps a coordinate to a range specified by maximum and minimum values.
Definition: coord.h:219
+
CUTLASS_HOST_DEVICE int const & at() const
Gets the index of a given Coord element.
Definition: coord.h:195
+
CUTLASS_HOST_DEVICE Coord operator/(Coord const &b) const
Element-wise division.
Definition: coord.h:114
+
Kind
Definition: coord.h:41
+
CUTLASS_HOST_DEVICE T dot(Coord const &b, T sum) const
Computes the dot product of two Coord instances.
Definition: coord.h:166
+
CUTLASS_HOST_DEVICE Coord(int _idx[])
Constructs from an array of integers.
Definition: coord.h:76
+
#define __NV_STD_MAX(a, b)
Select maximum(a, b)
Definition: platform.h:155
+
CUTLASS_HOST_DEVICE int & at(int dim)
Access via index; may limit unrolling potential.
Definition: coord.h:191
+
CUTLASS_HOST_DEVICE int & operator[](int dim)
Member access operator.
Definition: coord.h:159
+
CUTLASS_HOST_DEVICE Coord & operator-=(Coord const &b)
In-place subtraction.
Definition: coord.h:133
+
CUTLASS_HOST_DEVICE Coord operator*(Coord const &b) const
Element-wise multiplication.
Definition: coord.h:104
+
CUTLASS_HOST_DEVICE Coord(int value=0)
Default ctor initializes uniformly.
Definition: coord.h:68
+
CUTLASS_HOST_DEVICE Coord< 2 > get_Coord_hw(Coord< 3 > const &coord)
Getter.
Definition: coord.h:271
+
static int const N
Definition: coord.h:53
+
#define __NV_STD_MIN(a, b)
Select minimum(a, b)
Definition: platform.h:160
+
CUTLASS_HOST_DEVICE T dot(Coord const &b) const
Computes the dot product of two Coord instances.
Definition: coord.h:175
+
CUTLASS_HOST_DEVICE Coord operator-(Coord const &b) const
Element-wise subtraction.
Definition: coord.h:94
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)
std::min
Definition: platform.h:201
+
Definition: coord.h:41
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
CUTLASS_HOST_DEVICE int & at()
Gets the index of a given Coord element.
Definition: coord.h:185
+
int idx[N]
Indices.
Definition: coord.h:60
+
Definition: coord.h:41
+
CUTLASS_HOST_DEVICE int const & at(int dim) const
Access via index; may limit unrolling potential.
Definition: coord.h:201
+
Basic include for CUTLASS macros.
+
CUTLASS_HOST_DEVICE Coord & operator*=(Coord const &b)
In-place multiplication.
Definition: coord.h:142
+
CUTLASS_HOST_DEVICE Coord operator+(Coord const &b) const
Element-wise addition.
Definition: coord.h:84
+
CUTLASS_HOST_DEVICE Coord & operator/=(Coord const &b)
In-place division.
Definition: coord.h:151
+
+ + + + diff --git a/docs/generated-html/core__io_8h.html b/docs/generated-html/core__io_8h.html new file mode 100644 index 00000000..d71c3971 --- /dev/null +++ b/docs/generated-html/core__io_8h.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: core_io.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
core_io.h File Reference
+
+
+ +

Helpers for printing cutlass/core objects. +More...

+
#include <iosfwd>
+#include <typeinfo>
+#include <cutlass/coord.h>
+
+

Go to the source code of this file.

+ + + + + +

+Functions

template<int Rank>
std::ostream & operator<< (std::ostream &out, cutlass::Coord< Rank > const &coord)
 
+

Function Documentation

+ +

◆ operator<<()

+ +
+
+
+template<int Rank>
+ + + + + + + + + + + + + + + + + + +
std::ostream& operator<< (std::ostream & out,
cutlass::Coord< Rank > const & coord 
)
+
+ +
+
+
+ + + + diff --git a/docs/generated-html/core__io_8h_source.html b/docs/generated-html/core__io_8h_source.html new file mode 100644 index 00000000..7c076c94 --- /dev/null +++ b/docs/generated-html/core__io_8h_source.html @@ -0,0 +1,88 @@ + + + + + + + +Cutlass: core_io.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
core_io.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 #pragma once
26 
31 #pragma once
32 
33 #include <iosfwd>
34 #include <typeinfo>
35 
36 #include <cutlass/coord.h>
37 
38 template <int Rank>
39 std::ostream& operator<<(std::ostream& out, cutlass::Coord<Rank> const& coord) {
40  for (int i = 0; i < Rank; ++i) {
41  out << (i ? ", " : "") << coord.idx[i];
42  }
43  return out;
44 }
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
+ + + + diff --git a/docs/generated-html/cutlass_8h.html b/docs/generated-html/cutlass_8h.html new file mode 100644 index 00000000..bbb0463c --- /dev/null +++ b/docs/generated-html/cutlass_8h.html @@ -0,0 +1,237 @@ + + + + + + + +Cutlass: cutlass.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass.h File Reference
+
+
+ +

Basic include for CUTLASS macros. +More...

+ +

Go to the source code of this file.

+ + + + +

+Namespaces

 cutlass
 
+ + + + + + + + + + + + + + + + + +

+Macros

#define CUTLASS_MAJOR   1
 
#define CUTLASS_MINOR   0
 
#define CUTLASS_PATCH   0
 
#define CUTLASS_VERSION   ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
 
#define CUTLASS_HOST_DEVICE
 
#define CUTLASS_PRAGMA_UNROLL
 
#define CUTLASS_PRAGMA_NO_UNROLL
 
#define CUTLASS_ASSERT(x)   assert(x)
 
+

Macro Definition Documentation

+ +

◆ CUTLASS_ASSERT

+ +
+
+ + + + + + + + +
#define CUTLASS_ASSERT( x)   assert(x)
+
+ +
+
+ +

◆ CUTLASS_HOST_DEVICE

+ +
+
+ + + + +
#define CUTLASS_HOST_DEVICE
+
+ +
+
+ +

◆ CUTLASS_MAJOR

+ +
+
+ + + + +
#define CUTLASS_MAJOR   1
+
+ +
+
+ +

◆ CUTLASS_MINOR

+ +
+
+ + + + +
#define CUTLASS_MINOR   0
+
+ +
+
+ +

◆ CUTLASS_PATCH

+ +
+
+ + + + +
#define CUTLASS_PATCH   0
+
+ +
+
+ +

◆ CUTLASS_PRAGMA_NO_UNROLL

+ +
+
+ + + + +
#define CUTLASS_PRAGMA_NO_UNROLL
+
+ +
+
+ +

◆ CUTLASS_PRAGMA_UNROLL

+ +
+
+ + + + +
#define CUTLASS_PRAGMA_UNROLL
+
+ +
+
+ +

◆ CUTLASS_VERSION

+ +
+
+ + + + +
#define CUTLASS_VERSION   ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
+
+ +
+
+
+ + + + diff --git a/docs/generated-html/cutlass_8h_source.html b/docs/generated-html/cutlass_8h_source.html new file mode 100644 index 00000000..d2f44229 --- /dev/null +++ b/docs/generated-html/cutlass_8h_source.html @@ -0,0 +1,88 @@ + + + + + + + +Cutlass: cutlass.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
30 #pragma once
31 
33 
34 #define CUTLASS_MAJOR 1
35 #define CUTLASS_MINOR 0
36 #define CUTLASS_PATCH 0
37 #define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
38 
39 #ifdef __NVCC__
40 #define CUTLASS_HOST_DEVICE __forceinline__ __device__ __host__
41 #define CUTLASS_DEVICE __forceinline__ __device__
42 #elif defined(__CUDACC_RTC__)
43 #define CUTLASS_HOST_DEVICE __forceinline__ __device__
44 #define CUTLASS_DEVICE __forceinline__ __device__
45 #else
46 #define CUTLASS_HOST_DEVICE
47 // CUTLASS_DEVICE is an error if not compiling device code
48 #endif
49 
50 // CUTLASS_PRAGMA_UNROLL inserts a CUTLASS_PRAGMA_UNROLL if supported by the compiler
51 #if defined(__CUDA_ARCH__)
52 #if defined(_MSC_VER)
53 #define CUTLASS_PRAGMA_UNROLL __pragma("unroll")
54 #define CUTLASS_PRAGMA_NO_UNROLL __pragma("unroll 1")
55 #else
56 #define CUTLASS_PRAGMA_UNROLL _Pragma("unroll")
57 #define CUTLASS_PRAGMA_NO_UNROLL _Pragma("unroll 1")
58 #endif
59 #else
60 #define CUTLASS_PRAGMA_UNROLL
61 #define CUTLASS_PRAGMA_NO_UNROLL
62 #endif
63 
64 #define CUTLASS_ASSERT(x) assert(x)
65 
66 namespace cutlass {
67 
69 static const int kWarpSize = 32;
70 
71 } // namespace cutlass
72 
Definition: convert.h:33
+
+ + + + diff --git a/docs/generated-html/cutlass__math_8h.html b/docs/generated-html/cutlass__math_8h.html new file mode 100644 index 00000000..953b0d4c --- /dev/null +++ b/docs/generated-html/cutlass__math_8h.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass_math.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass_math.h File Reference
+
+
+ +

Math utilities. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::is_pow2< N >
 
struct  cutlass::log2_down< N, CurrentVal, Count >
 
struct  cutlass::log2_down< N, 1, Count >
 
struct  cutlass::log2_up< N, CurrentVal, Count >
 
struct  cutlass::log2_up< N, 1, Count >
 
struct  cutlass::sqrt_est< N >
 
struct  cutlass::divide_assert< Dividend, Divisor >
 
+ + + +

+Namespaces

 cutlass
 
+ + + + + + + + + + +

+Functions

template<typename dividend_t , typename divisor_t >
CUTLASS_HOST_DEVICE dividend_t cutlass::round_nearest (dividend_t dividend, divisor_t divisor)
 
template<typename value_t >
CUTLASS_HOST_DEVICE value_t cutlass::gcd (value_t a, value_t b)
 
template<typename value_t >
CUTLASS_HOST_DEVICE value_t cutlass::lcm (value_t a, value_t b)
 
+
+ + + + diff --git a/docs/generated-html/cutlass__math_8h_source.html b/docs/generated-html/cutlass__math_8h_source.html new file mode 100644 index 00000000..2809a845 --- /dev/null +++ b/docs/generated-html/cutlass__math_8h_source.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: cutlass_math.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass_math.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
26 #pragma once
27 
33 #include <cutlass/util/platform.h>
34 
35 namespace cutlass {
36 
37 /******************************************************************************
38  * Static math utilities
39  ******************************************************************************/
40 
44 template <int N>
45 struct is_pow2 : platform::integral_constant<bool, (N & (N - 1)) == 0> {};
46 
50 template <int N, int CurrentVal = N, int Count = 0>
51 struct log2_down {
53  enum { value = log2_down<N, (CurrentVal >> 1), Count + 1>::value };
54 };
55 
56 // Base case
57 template <int N, int Count>
58 struct log2_down<N, 1, Count> {
59  enum { value = Count };
60 };
61 
65 template <int N, int CurrentVal = N, int Count = 0>
66 struct log2_up {
68  enum { value = log2_up<N, (CurrentVal >> 1), Count + 1>::value };
69 };
70 
71 // Base case
72 template <int N, int Count>
73 struct log2_up<N, 1, Count> {
74  enum { value = ((1 << Count) < N) ? Count + 1 : Count };
75 };
76 
80 template <int N>
81 struct sqrt_est {
82  enum { value = 1 << (log2_up<N>::value / 2) };
83 };
84 
89 template <int Dividend, int Divisor>
90 struct divide_assert {
91  enum { value = Dividend / Divisor };
92 
93  static_assert((Dividend % Divisor == 0), "Not an even multiple");
94 };
95 
96 /******************************************************************************
97  * Rounding
98  ******************************************************************************/
99 
103 template <typename dividend_t, typename divisor_t>
104 CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor) {
105  return ((dividend + divisor - 1) / divisor) * divisor;
106 }
107 
111 template <typename value_t>
112 CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b) {
113  for (;;) {
114  if (a == 0) return b;
115  b %= a;
116  if (b == 0) return a;
117  a %= b;
118  }
119 }
120 
124 template <typename value_t>
125 CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b) {
126  value_t temp = gcd(a, b);
127 
128  return temp ? (a / temp * b) : 0;
129 }
130 
131 } // namespace cutlass
Definition: cutlass_math.h:91
+
Definition: convert.h:33
+
Definition: cutlass_math.h:51
+
C++ features that may be otherwise unimplemented for CUDA device functions.
+
Definition: cutlass_math.h:53
+
CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b)
Definition: cutlass_math.h:125
+
CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor)
Definition: cutlass_math.h:104
+
Definition: cutlass_math.h:68
+
std::integral_constant
Definition: platform.h:274
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Definition: cutlass_math.h:82
+
CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b)
Definition: cutlass_math.h:112
+
Definition: cutlass_math.h:90
+
Definition: cutlass_math.h:66
+
Definition: cutlass_math.h:45
+
Definition: cutlass_math.h:81
+
+ + + + diff --git a/docs/generated-html/debug_8h.html b/docs/generated-html/debug_8h.html new file mode 100644 index 00000000..1f88396a --- /dev/null +++ b/docs/generated-html/debug_8h.html @@ -0,0 +1,239 @@ + + + + + + + +Cutlass: debug.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
debug.h File Reference
+
+
+ +

Debugging and logging functionality. +More...

+
#include <stdio.h>
+
+

Go to the source code of this file.

+ + + + +

+Namespaces

 cutlass
 
+ + + + + + + + + + + + + + +

+Macros

#define CUDA_LOG(format, ...)   printf(format, __VA_ARGS__)
 
#define CUDA_LOG_DEBUG(format, ...)
 
#define CUDA_PERROR(e)   cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)
 Perror macro. More...
 
#define CUDA_PERROR_EXIT(e)
 Perror macro with exit. More...
 
#define CUDA_PERROR_DEBUG(e)   (e)
 Perror macro only if DEBUG is defined. More...
 
+ + + + +

+Functions

__host__ CUTLASS_DEVICE cudaError_t cutlass::cuda_perror_impl (cudaError_t error, const char *filename, int line)
 The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
 
+

Macro Definition Documentation

+ +

◆ CUDA_LOG

+ +
+
+ + + + + + + + + + + + + + + + + + +
#define CUDA_LOG( format,
 ... 
)   printf(format, __VA_ARGS__)
+
+

Formats and prints the given message to stdout

+ +
+
+ +

◆ CUDA_LOG_DEBUG

+ +
+
+ + + + + + + + + + + + + + + + + + +
#define CUDA_LOG_DEBUG( format,
 ... 
)
+
+

Formats and prints the given message to stdout only if DEBUG is defined

+ +
+
+ +

◆ CUDA_PERROR

+ +
+
+ + + + + + + + +
#define CUDA_PERROR( e)   cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)
+
+ +
+
+ +

◆ CUDA_PERROR_DEBUG

+ +
+
+ + + + + + + + +
#define CUDA_PERROR_DEBUG( e)   (e)
+
+ +
+
+ +

◆ CUDA_PERROR_EXIT

+ +
+
+ + + + + + + + +
#define CUDA_PERROR_EXIT( e)
+
+Value:
if (cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)) { \
exit(1); \
}
__host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl(cudaError_t error, const char *filename, int line)
The corresponding error message is printed to stderr (or stdout in device code) along with the suppli...
Definition: debug.h:77
+
+
+
+
+ + + + diff --git a/docs/generated-html/debug_8h_source.html b/docs/generated-html/debug_8h_source.html new file mode 100644 index 00000000..881b4e3f --- /dev/null +++ b/docs/generated-html/debug_8h_source.html @@ -0,0 +1,89 @@ + + + + + + + +Cutlass: debug.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
debug.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
26 #pragma once
27 
33 #include <stdio.h>
34 
35 namespace cutlass {
36 
37 /******************************************************************************
38  * Debug and logging macros
39  ******************************************************************************/
40 
44 #if !defined(CUDA_LOG)
45 #if !defined(__CUDA_ARCH__)
46 #define CUDA_LOG(format, ...) printf(format, __VA_ARGS__)
47 #else
48 #define CUDA_LOG(format, ...) \
49  printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, \
50  blockIdx.x, \
51  blockIdx.y, \
52  blockIdx.z, \
53  threadIdx.x, \
54  threadIdx.y, \
55  threadIdx.z, \
56  __VA_ARGS__);
57 #endif
58 #endif
59 
63 #if !defined(CUDA_LOG_DEBUG)
64 #ifdef DEBUG
65 #define CUDA_LOG_DEBUG(format, ...) CUDA_LOG(format, __VA_ARGS__)
66 #else
67 #define CUDA_LOG_DEBUG(format, ...)
68 #endif
69 #endif
70 
77 __host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl(cudaError_t error,
78  const char* filename,
79  int line) {
80  (void)filename;
81  (void)line;
82  if (error) {
83 #if !defined(__CUDA_ARCH__)
84  fprintf(
85  stderr, "CUDA error %d [%s, %d]: %s\n", error, filename, line, cudaGetErrorString(error));
86  fflush(stderr);
87 #else
88  printf("CUDA error %d [%s, %d]\n", error, filename, line);
89 #endif
90  }
91  return error;
92 }
93 
97 #ifndef CUDA_PERROR
98 #define CUDA_PERROR(e) cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)
99 #endif
100 
104 #ifndef CUDA_PERROR_EXIT
105 #define CUDA_PERROR_EXIT(e) \
106  if (cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)) { \
107  exit(1); \
108  }
109 #endif
110 
114 #ifndef CUDA_PERROR_DEBUG
115 #ifdef DEBUG
116 #define CUDA_PERROR_DEBUG(e) CUDA_PERROR(e)
117 #else
118 #define CUDA_PERROR_DEBUG(e) (e)
119 #endif
120 #endif
121 
122 } // namespace cutlass
Definition: convert.h:33
+
__host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl(cudaError_t error, const char *filename, int line)
The corresponding error message is printed to stderr (or stdout in device code) along with the suppli...
Definition: debug.h:77
+
+ + + + diff --git a/docs/generated-html/dgemm__traits_8h.html b/docs/generated-html/dgemm__traits_8h.html new file mode 100644 index 00000000..eebc2f36 --- /dev/null +++ b/docs/generated-html/dgemm__traits_8h.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: dgemm_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
dgemm_traits.h File Reference
+
+ + + + + diff --git a/docs/generated-html/dgemm__traits_8h_source.html b/docs/generated-html/dgemm__traits_8h_source.html new file mode 100644 index 00000000..9cf2c873 --- /dev/null +++ b/docs/generated-html/dgemm__traits_8h_source.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: dgemm_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
dgemm_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/gemm/gemm.h>
37 
38 namespace cutlass {
39 namespace gemm {
40 
42 
43 template <
45  typename OutputTile_,
47  typename AccumulatorsPerThread_,
49  int kScalarsPerLdgA_ = 1,
51  int kScalarsPerLdgB_ = 1>
53  : public GemmConfig<
55  double,
57  double,
59  double,
61  double,
63  OutputTile_,
65  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, double, double, double>,
67  kScalarsPerLdgA_,
69  kScalarsPerLdgA_,
71  2,
73  kScalarsPerLdgB_,
75  kScalarsPerLdgB_,
77  2,
79  1,
81  2,
83  1,
85  2> {};
86 
88 
89 template <
91  MatrixLayout::Kind kLayoutA_,
93  MatrixLayout::Kind kLayoutB_,
95  typename OutputTile_ = Shape<8, 64, 128>,
97  typename EpilogueFunctor_ = LinearScaling<double>,
99  typename AccumulatorsPerThread_ = Shape<8, 8, 8>,
101  int kScalarsPerLdgA_ = 1,
103  int kScalarsPerLdgB_ = 1,
105  typename Index_ = int,
107  typename GemmConfig_ =
110  typename GemmEpilogueTraits_ =
113  // The layout for A.
114  kLayoutA_,
115  // The layout for B.
116  kLayoutB_,
117  // The config.
118  GemmConfig_,
119  // The epilogue.
120  GemmEpilogue<GemmEpilogueTraits_>,
121  // The index.
122  Index_> {};
123 
125 
126 } // namespace gemm
127 } // namespace cutlass
Definition: convert.h:33
+
Defines iterators for efficiently loading and storing to global memory.
+
Defines structural properties of complete GEMM computation.
+
Template implementing matrix multiply-add operations on fragments.
+
Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
+
Defines iterators for efficiently loading and storing tiles to and from shared memory.
+
Definition: gemm_traits.h:79
+
Definition: dgemm_traits.h:112
+
Definition: dgemm_traits.h:52
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Definition: gemm_epilogue_traits.h:300
+
Kind
Definition: matrix_traits.h:36
+
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40
+
Implements a software-pipelined efficient GEMM.
+
Defines structural properties of the GEMM epilogue.
+
Definition: gemm_traits.h:723
+
+ + + + diff --git a/docs/generated-html/dir_1417ee5ebebc309c36b7962f26a92c39.html b/docs/generated-html/dir_1417ee5ebebc309c36b7962f26a92c39.html new file mode 100644 index 00000000..d7393ef1 --- /dev/null +++ b/docs/generated-html/dir_1417ee5ebebc309c36b7962f26a92c39.html @@ -0,0 +1,155 @@ + + + + + + + +Cutlass: cutlass Directory Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass Directory Reference
+
+
+ + + + + + +

+Directories

directory  gemm
 
directory  util
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Files

file  convert.h [code]
 Defines conversion operations among Fragments of different base type.
 
file  coord.h [code]
 A Coord is a coordinate of arbitrary rank into a tensor or matrix.
 
file  core_io.h [code]
 Helpers for printing cutlass/core objects.
 
file  cutlass.h [code]
 Basic include for CUTLASS macros.
 
file  fragment.h [code]
 Defines Fragment, a statically-sized array for storing parts of matrices within a thread's registers.
 
file  fragment_load_store.h [code]
 Defines accessors for loading and storing fragments to memory efficiently.
 
file  fragment_multiply_add.h [code]
 Defines multiply-add operations on fragments within a thread.
 
file  iterator_access.h [code]
 Free functions for loading and storing to implementations of tile iteartor concepts.
 
file  load_store.h [code]
 Defines abstractions for efficiently loading and storing vectors to memory.
 
file  matrix_traits.h [code]
 Defines properties of matrices used to denote layout and operands to GEMM kernels.
 
file  predicate_vector.h [code]
 Defines container classes and iterators for managing a statically sized vector of boolean predicates.
 
file  reshape_tile.h [code]
 Defines a type for restructuring a tile.
 
file  shape.h [code]
 Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
 
file  tensor_ref.h [code]
 Defines a structure containing strides, bounds, and a pointer to tensor data.
 
file  tensor_view.h [code]
 Defines a structure containing strides and a pointer to tensor data.
 
file  tile_iterator.h [code]
 Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
 
file  tile_traits_standard.h [code]
 Defines tile traits for several tile partitioning arrangements of threads expected to achieve efficient streaming performance.
 
file  vector.h [code]
 Defines a 1D vector of elements held in the registers of each thread.
 
file  wmma_matrix.h [code]
 Abstractions for loading and storing matrices using the CUDA WMMA API.
 
+
+ + + + diff --git a/docs/generated-html/dir_18d6a367a3982a494d65599933fc67a3.html b/docs/generated-html/dir_18d6a367a3982a494d65599933fc67a3.html new file mode 100644 index 00000000..16126747 --- /dev/null +++ b/docs/generated-html/dir_18d6a367a3982a494d65599933fc67a3.html @@ -0,0 +1,178 @@ + + + + + + + +Cutlass: gemm Directory Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm Directory Reference
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Files

file  clear_accumulators.h [code]
 Defines abstractions for efficiently clearing accumulator tiles.
 
file  dgemm_traits.h [code]
 Defines structural traits of double-precision GEMM.
 
file  gemm.h [code]
 Implements a software-pipelined efficient GEMM.
 
file  gemm_epilogue.h [code]
 Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product.
 
file  gemm_epilogue_traits.h [code]
 Defines structural properties of the GEMM epilogue.
 
file  gemm_global_stream.h [code]
 Implements efficient loading of the thread block-level tile from global memory and storing to shared memory.
 
file  gemm_global_tile.h [code]
 Defines iterators for efficiently loading and storing to global memory.
 
file  gemm_operand.h [code]
 Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory.
 
file  gemm_shared_stream.h [code]
 Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEMM pipeline.
 
file  gemm_shared_tile.h [code]
 Defines iterators for efficiently loading and storing tiles to and from shared memory.
 
file  gemm_traits.h [code]
 Defines structural properties of complete GEMM computation.
 
file  hgemm_global_tile.h [code]
 Tile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits.
 
file  hgemm_multiply_add.h [code]
 Specialization implementing multiply-add operation on half-precision floating point fragments.
 
file  hgemm_swizzle.h [code]
 Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands.
 
file  hgemm_traits.h [code]
 Defies structural properties of half-precision GEMM computation.
 
file  identity_block_swizzle.h [code]
 Defies functors for mapping blockIdx to partitions of the GEMM computation.
 
file  igemm_epilogue.h [code]
 Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point output matrix formats.
 
file  igemm_global_tile.h [code]
 Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory.
 
file  igemm_multiply_add.h [code]
 Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction.
 
file  igemm_swizzle.h [code]
 Transposes a fragment of data containing packed 8-bit integer elements.
 
file  igemm_traits.h [code]
 Defies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary.
 
file  linear_scaling.h [code]
 Implements the BLAS linear scaling function alpha*AB + beta*C.
 
file  sgemm_traits.h [code]
 Defies structural properties of single-precision GEMM.
 
file  thread_multiply_add.h [code]
 Template implementing matrix multiply-add operations on fragments.
 
file  wmma_gemm_epilogue_traits.h [code]
 Defines structural properties of WMMA GEMM's epilogue phase.
 
file  wmma_gemm_global_tile.h [code]
 Defines tile iterator traits for loading thread block-level tile from global memory.
 
file  wmma_gemm_multiply_add.h [code]
 Implements warp-level matrix multiply-accumulate operation using CUDA WMMA API.
 
file  wmma_gemm_shared_tile.h [code]
 Defines iterator traits for efficiently loading and storing fragment to and from shared memory, specialized for WMMA GEMM.
 
file  wmma_gemm_traits.h [code]
 Defies structural properties of GEMM targeting WMMA API in CUDA.
 
+
+ + + + diff --git a/docs/generated-html/dir_c5917a9a879e9a6c73eaf5237444ab84.html b/docs/generated-html/dir_c5917a9a879e9a6c73eaf5237444ab84.html new file mode 100644 index 00000000..a66eb22f --- /dev/null +++ b/docs/generated-html/dir_c5917a9a879e9a6c73eaf5237444ab84.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: util Directory Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
util Directory Reference
+
+
+ + + + + + + + + + + +

+Files

file  cutlass_math.h [code]
 Math utilities.
 
file  debug.h [code]
 Debugging and logging functionality.
 
file  platform.h [code]
 C++ features that may be otherwise unimplemented for CUDA device functions.
 
+
+ + + + diff --git a/docs/generated-html/doc.png b/docs/generated-html/doc.png new file mode 100644 index 00000000..3cbc5d32 Binary files /dev/null and b/docs/generated-html/doc.png differ diff --git a/docs/generated-html/doxygen.css b/docs/generated-html/doxygen.css new file mode 100644 index 00000000..e8440f81 --- /dev/null +++ b/docs/generated-html/doxygen.css @@ -0,0 +1,1596 @@ +/* The standard CSS for doxygen 1.8.14 */ + +body, table, div, p, dl { + font: 400 14px/22px Roboto,sans-serif; +} + +p.reference, p.definition { + font: 400 14px/22px Roboto,sans-serif; +} + +/* @group Heading Levels */ + +h1.groupheader { + font-size: 150%; +} + +.title { + font: 400 14px/28px Roboto,sans-serif; + font-size: 150%; + font-weight: bold; + margin: 10px 2px; +} + +h2.groupheader { + border-bottom: 1px solid #B2CB87; + color: #617B35; + font-size: 150%; + font-weight: normal; + margin-top: 1.75em; + padding-top: 8px; + padding-bottom: 4px; + width: 100%; +} + +h3.groupheader { + font-size: 100%; +} + +h1, h2, h3, h4, h5, h6 { + -webkit-transition: text-shadow 0.5s linear; + -moz-transition: text-shadow 0.5s linear; + -ms-transition: text-shadow 0.5s linear; + -o-transition: text-shadow 0.5s linear; + transition: text-shadow 0.5s linear; + margin-right: 15px; +} + +h1.glow, h2.glow, h3.glow, h4.glow, h5.glow, h6.glow { + text-shadow: 0 0 15px cyan; +} + +dt { + font-weight: bold; +} + +div.multicol { + -moz-column-gap: 1em; + -webkit-column-gap: 1em; + -moz-column-count: 3; + -webkit-column-count: 3; +} + +p.startli, p.startdd { + margin-top: 2px; +} + +p.starttd { + margin-top: 0px; +} + +p.endli { + margin-bottom: 0px; +} + +p.enddd { + margin-bottom: 4px; +} + +p.endtd { + margin-bottom: 2px; +} + +/* @end */ + +caption { + font-weight: bold; +} + +span.legend { + font-size: 70%; + text-align: center; +} + +h3.version { + font-size: 90%; + text-align: center; +} + +div.qindex, div.navtab{ + background-color: #F2F6EB; + border: 1px solid #C4D7A3; + text-align: center; +} + +div.qindex, div.navpath { + width: 100%; + line-height: 140%; +} + +div.navtab { + margin-right: 15px; +} + +/* @group Link Styling */ + +a { + color: #6F8C3D; + font-weight: normal; + text-decoration: none; +} + +.contents a:visited { + color: #80A246; +} + +a:hover { + text-decoration: underline; +} + +a.qindex { + font-weight: bold; +} + +a.qindexHL { + font-weight: bold; + background-color: #BFD49C; + color: #ffffff; + border: 1px double #B1CA86; +} + +.contents a.qindexHL:visited { + color: #ffffff; +} + +a.el { + font-weight: bold; +} + +a.elRef { +} + +a.code, a.code:visited, a.line, a.line:visited { + color: #4665A2; +} + +a.codeRef, a.codeRef:visited, a.lineRef, a.lineRef:visited { + color: #4665A2; +} + +/* @end */ + +dl.el { + margin-left: -1cm; +} + +pre.fragment { + border: 1px solid #C4CFE5; + background-color: #FBFCFD; + padding: 4px 6px; + margin: 4px 8px 4px 2px; + overflow: auto; + word-wrap: break-word; + font-size: 9pt; + line-height: 125%; + font-family: monospace, fixed; + font-size: 105%; +} + +div.fragment { + padding: 0px; + margin: 4px 8px 4px 2px; + background-color: #FCFDFB; + border: 1px solid #D9E5C4; +} + +div.line { + font-family: monospace, fixed; + font-size: 13px; + min-height: 13px; + line-height: 1.0; + text-wrap: unrestricted; + white-space: -moz-pre-wrap; /* Moz */ + white-space: -pre-wrap; /* Opera 4-6 */ + white-space: -o-pre-wrap; /* Opera 7 */ + white-space: pre-wrap; /* CSS3 */ + word-wrap: break-word; /* IE 5.5+ */ + text-indent: -53px; + padding-left: 53px; + padding-bottom: 0px; + margin: 0px; + -webkit-transition-property: background-color, box-shadow; + -webkit-transition-duration: 0.5s; + -moz-transition-property: background-color, box-shadow; + -moz-transition-duration: 0.5s; + -ms-transition-property: background-color, box-shadow; + -ms-transition-duration: 0.5s; + -o-transition-property: background-color, box-shadow; + -o-transition-duration: 0.5s; + transition-property: background-color, box-shadow; + transition-duration: 0.5s; +} + +div.line:after { + content:"\000A"; + white-space: pre; +} + +div.line.glow { + background-color: cyan; + box-shadow: 0 0 10px cyan; +} + + +span.lineno { + padding-right: 4px; + text-align: right; + border-right: 2px solid #0F0; + background-color: #E8E8E8; + white-space: pre; +} +span.lineno a { + background-color: #D8D8D8; +} + +span.lineno a:hover { + background-color: #C8C8C8; +} + +.lineno { + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} + +div.ah, span.ah { + background-color: black; + font-weight: bold; + color: #ffffff; + margin-bottom: 3px; + margin-top: 3px; + padding: 0.2em; + border: solid thin #333; + border-radius: 0.5em; + -webkit-border-radius: .5em; + -moz-border-radius: .5em; + box-shadow: 2px 2px 3px #999; + -webkit-box-shadow: 2px 2px 3px #999; + -moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px; + background-image: -webkit-gradient(linear, left top, left bottom, from(#eee), to(#000),color-stop(0.3, #444)); + background-image: -moz-linear-gradient(center top, #eee 0%, #444 40%, #000 110%); +} + +div.classindex ul { + list-style: none; + padding-left: 0; +} + +div.classindex span.ai { + display: inline-block; +} + +div.groupHeader { + margin-left: 16px; + margin-top: 12px; + font-weight: bold; +} + +div.groupText { + margin-left: 16px; + font-style: italic; +} + +body { + background-color: white; + color: black; + margin: 0; +} + +div.contents { + margin-top: 10px; + margin-left: 12px; + margin-right: 8px; +} + +td.indexkey { + background-color: #F2F6EB; + font-weight: bold; + border: 1px solid #D9E5C4; + margin: 2px 0px 2px 0; + padding: 2px 10px; + white-space: nowrap; + vertical-align: top; +} + +td.indexvalue { + background-color: #F2F6EB; + border: 1px solid #D9E5C4; + padding: 2px 10px; + margin: 2px 0px; +} + +tr.memlist { + background-color: #F4F7EE; +} + +p.formulaDsp { + text-align: center; +} + +img.formulaDsp { + +} + +img.formulaInl { + vertical-align: middle; +} + +div.center { + text-align: center; + margin-top: 0px; + margin-bottom: 0px; + padding: 0px; +} + +div.center img { + border: 0px; +} + +address.footer { + text-align: right; + padding-right: 12px; +} + +img.footer { + border: 0px; + vertical-align: middle; +} + +/* @group Code Colorization */ + +span.keyword { + color: #008000 +} + +span.keywordtype { + color: #604020 +} + +span.keywordflow { + color: #e08000 +} + +span.comment { + color: #800000 +} + +span.preprocessor { + color: #806020 +} + +span.stringliteral { + color: #002080 +} + +span.charliteral { + color: #008080 +} + +span.vhdldigit { + color: #ff00ff +} + +span.vhdlchar { + color: #000000 +} + +span.vhdlkeyword { + color: #700070 +} + +span.vhdllogic { + color: #ff0000 +} + +blockquote { + background-color: #F9FBF7; + border-left: 2px solid #BFD49C; + margin: 0 24px 0 4px; + padding: 0 12px 0 16px; +} + +/* @end */ + +/* +.search { + color: #003399; + font-weight: bold; +} + +form.search { + margin-bottom: 0px; + margin-top: 0px; +} + +input.search { + font-size: 75%; + color: #000080; + font-weight: normal; + background-color: #e8eef2; +} +*/ + +td.tiny { + font-size: 75%; +} + +.dirtab { + padding: 4px; + border-collapse: collapse; + border: 1px solid #C4D7A3; +} + +th.dirtab { + background: #F2F6EB; + font-weight: bold; +} + +hr { + height: 0px; + border: none; + border-top: 1px solid #87AA4A; +} + +hr.footer { + height: 1px; +} + +/* @group Member Descriptions */ + +table.memberdecls { + border-spacing: 0px; + padding: 0px; +} + +.memberdecls td, .fieldtable tr { + -webkit-transition-property: background-color, box-shadow; + -webkit-transition-duration: 0.5s; + -moz-transition-property: background-color, box-shadow; + -moz-transition-duration: 0.5s; + -ms-transition-property: background-color, box-shadow; + -ms-transition-duration: 0.5s; + -o-transition-property: background-color, box-shadow; + -o-transition-duration: 0.5s; + transition-property: background-color, box-shadow; + transition-duration: 0.5s; +} + +.memberdecls td.glow, .fieldtable tr.glow { + background-color: cyan; + box-shadow: 0 0 15px cyan; +} + +.mdescLeft, .mdescRight, +.memItemLeft, .memItemRight, +.memTemplItemLeft, .memTemplItemRight, .memTemplParams { + background-color: #FBFCF9; + border: none; + margin: 4px; + padding: 1px 0 0 8px; +} + +.mdescLeft, .mdescRight { + padding: 0px 8px 4px 8px; + color: #555; +} + +.memSeparator { + border-bottom: 1px solid #DEE4F0; + line-height: 1px; + margin: 0px; + padding: 0px; +} + +.memItemLeft, .memTemplItemLeft { + white-space: nowrap; +} + +.memItemRight { + width: 100%; +} + +.memTemplParams { + color: #80A246; + white-space: nowrap; + font-size: 80%; +} + +/* @end */ + +/* @group Member Details */ + +/* Styles for detailed member documentation */ + +.memtitle { + padding: 8px; + border-top: 1px solid #C7D9A8; + border-left: 1px solid #C7D9A8; + border-right: 1px solid #C7D9A8; + border-top-right-radius: 4px; + border-top-left-radius: 4px; + margin-bottom: -1px; + background-image: url('nav_f.png'); + background-repeat: repeat-x; + background-color: #ECF2E2; + line-height: 1.25; + font-weight: 300; + float:left; +} + +.permalink +{ + font-size: 65%; + display: inline-block; + vertical-align: middle; +} + +.memtemplate { + font-size: 80%; + color: #80A246; + font-weight: normal; + margin-left: 9px; +} + +.memnav { + background-color: #F2F6EB; + border: 1px solid #C4D7A3; + text-align: center; + margin: 2px; + margin-right: 15px; + padding: 2px; +} + +.mempage { + width: 100%; +} + +.memitem { + padding: 0; + margin-bottom: 10px; + margin-right: 5px; + -webkit-transition: box-shadow 0.5s linear; + -moz-transition: box-shadow 0.5s linear; + -ms-transition: box-shadow 0.5s linear; + -o-transition: box-shadow 0.5s linear; + transition: box-shadow 0.5s linear; + display: table !important; + width: 100%; +} + +.memitem.glow { + box-shadow: 0 0 15px cyan; +} + +.memname { + font-weight: 400; + margin-left: 6px; +} + +.memname td { + vertical-align: bottom; +} + +.memproto, dl.reflist dt { + border-top: 1px solid #C7D9A8; + border-left: 1px solid #C7D9A8; + border-right: 1px solid #C7D9A8; + padding: 6px 0px 6px 0px; + color: #435525; + font-weight: bold; + text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); + background-color: #EAF1DF; + /* opera specific markup */ + box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + border-top-right-radius: 4px; + /* firefox specific markup */ + -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px; + -moz-border-radius-topright: 4px; + /* webkit specific markup */ + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + -webkit-border-top-right-radius: 4px; + +} + +.overload { + font-family: "courier new",courier,monospace; + font-size: 65%; +} + +.memdoc, dl.reflist dd { + border-bottom: 1px solid #C7D9A8; + border-left: 1px solid #C7D9A8; + border-right: 1px solid #C7D9A8; + padding: 6px 10px 2px 10px; + background-color: #FCFDFB; + border-top-width: 0; + background-image:url('nav_g.png'); + background-repeat:repeat-x; + background-color: #FFFFFF; + /* opera specific markup */ + border-bottom-left-radius: 4px; + border-bottom-right-radius: 4px; + box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + /* firefox specific markup */ + -moz-border-radius-bottomleft: 4px; + -moz-border-radius-bottomright: 4px; + -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px; + /* webkit specific markup */ + -webkit-border-bottom-left-radius: 4px; + -webkit-border-bottom-right-radius: 4px; + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); +} + +dl.reflist dt { + padding: 5px; +} + +dl.reflist dd { + margin: 0px 0px 10px 0px; + padding: 5px; +} + +.paramkey { + text-align: right; +} + +.paramtype { + white-space: nowrap; +} + +.paramname { + color: #602020; + white-space: nowrap; +} +.paramname em { + font-style: normal; +} +.paramname code { + line-height: 14px; +} + +.params, .retval, .exception, .tparams { + margin-left: 0px; + padding-left: 0px; +} + +.params .paramname, .retval .paramname { + font-weight: bold; + vertical-align: top; +} + +.params .paramtype { + font-style: italic; + vertical-align: top; +} + +.params .paramdir { + font-family: "courier new",courier,monospace; + vertical-align: top; +} + +table.mlabels { + border-spacing: 0px; +} + +td.mlabels-left { + width: 100%; + padding: 0px; +} + +td.mlabels-right { + vertical-align: bottom; + padding: 0px; + white-space: nowrap; +} + +span.mlabels { + margin-left: 8px; +} + +span.mlabel { + background-color: #A4C172; + border-top:1px solid #90B453; + border-left:1px solid #90B453; + border-right:1px solid #D9E5C4; + border-bottom:1px solid #D9E5C4; + text-shadow: none; + color: white; + margin-right: 4px; + padding: 2px 3px; + border-radius: 3px; + font-size: 7pt; + white-space: nowrap; + vertical-align: middle; +} + + + +/* @end */ + +/* these are for tree view inside a (index) page */ + +div.directory { + margin: 10px 0px; + border-top: 1px solid #BFD49C; + border-bottom: 1px solid #BFD49C; + width: 100%; +} + +.directory table { + border-collapse:collapse; +} + +.directory td { + margin: 0px; + padding: 0px; + vertical-align: top; +} + +.directory td.entry { + white-space: nowrap; + padding-right: 6px; + padding-top: 3px; +} + +.directory td.entry a { + outline:none; +} + +.directory td.entry a img { + border: none; +} + +.directory td.desc { + width: 100%; + padding-left: 6px; + padding-right: 6px; + padding-top: 3px; + border-left: 1px solid rgba(0,0,0,0.05); +} + +.directory tr.even { + padding-left: 6px; + background-color: #F9FBF7; +} + +.directory img { + vertical-align: -30%; +} + +.directory .levels { + white-space: nowrap; + width: 100%; + text-align: right; + font-size: 9pt; +} + +.directory .levels span { + cursor: pointer; + padding-left: 2px; + padding-right: 2px; + color: #6F8C3D; +} + +.arrow { + color: #BFD49C; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; + cursor: pointer; + font-size: 80%; + display: inline-block; + width: 16px; + height: 22px; +} + +.icon { + font-family: Arial, Helvetica; + font-weight: bold; + font-size: 12px; + height: 14px; + width: 16px; + display: inline-block; + background-color: #A4C172; + color: white; + text-align: center; + border-radius: 4px; + margin-left: 2px; + margin-right: 2px; +} + +.icona { + width: 24px; + height: 22px; + display: inline-block; +} + +.iconfopen { + width: 24px; + height: 18px; + margin-bottom: 4px; + background-image:url('folderopen.png'); + background-position: 0px -4px; + background-repeat: repeat-y; + vertical-align:top; + display: inline-block; +} + +.iconfclosed { + width: 24px; + height: 18px; + margin-bottom: 4px; + background-image:url('folderclosed.png'); + background-position: 0px -4px; + background-repeat: repeat-y; + vertical-align:top; + display: inline-block; +} + +.icondoc { + width: 24px; + height: 18px; + margin-bottom: 4px; + background-image:url('doc.png'); + background-position: 0px -4px; + background-repeat: repeat-y; + vertical-align:top; + display: inline-block; +} + +table.directory { + font: 400 14px Roboto,sans-serif; +} + +/* @end */ + +div.dynheader { + margin-top: 8px; + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} + +address { + font-style: normal; + color: #4D612A; +} + +table.doxtable caption { + caption-side: top; +} + +table.doxtable { + border-collapse:collapse; + margin-top: 4px; + margin-bottom: 4px; +} + +table.doxtable td, table.doxtable th { + border: 1px solid #52682D; + padding: 3px 7px 2px; +} + +table.doxtable th { + background-color: #657F37; + color: #FFFFFF; + font-size: 110%; + padding-bottom: 4px; + padding-top: 5px; +} + +table.fieldtable { + /*width: 100%;*/ + margin-bottom: 10px; + border: 1px solid #C7D9A8; + border-spacing: 0px; + -moz-border-radius: 4px; + -webkit-border-radius: 4px; + border-radius: 4px; + -moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px; + -webkit-box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.15); + box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.15); +} + +.fieldtable td, .fieldtable th { + padding: 3px 7px 2px; +} + +.fieldtable td.fieldtype, .fieldtable td.fieldname { + white-space: nowrap; + border-right: 1px solid #C7D9A8; + border-bottom: 1px solid #C7D9A8; + vertical-align: top; +} + +.fieldtable td.fieldname { + padding-top: 3px; +} + +.fieldtable td.fielddoc { + border-bottom: 1px solid #C7D9A8; + /*width: 100%;*/ +} + +.fieldtable td.fielddoc p:first-child { + margin-top: 0px; +} + +.fieldtable td.fielddoc p:last-child { + margin-bottom: 2px; +} + +.fieldtable tr:last-child td { + border-bottom: none; +} + +.fieldtable th { + background-image:url('nav_f.png'); + background-repeat:repeat-x; + background-color: #ECF2E2; + font-size: 90%; + color: #435525; + padding-bottom: 4px; + padding-top: 5px; + text-align:left; + font-weight: 400; + -moz-border-radius-topleft: 4px; + -moz-border-radius-topright: 4px; + -webkit-border-top-left-radius: 4px; + -webkit-border-top-right-radius: 4px; + border-top-left-radius: 4px; + border-top-right-radius: 4px; + border-bottom: 1px solid #C7D9A8; +} + + +.tabsearch { + top: 0px; + left: 10px; + height: 36px; + background-image: url('tab_b.png'); + z-index: 101; + overflow: hidden; + font-size: 13px; +} + +.navpath ul +{ + font-size: 11px; + background-image:url('tab_b.png'); + background-repeat:repeat-x; + background-position: 0 -5px; + height:30px; + line-height:30px; + color:#B3CC8A; + border:solid 1px #D8E4C2; + overflow:hidden; + margin:0px; + padding:0px; +} + +.navpath li +{ + list-style-type:none; + float:left; + padding-left:10px; + padding-right:15px; + background-image:url('bc_s.png'); + background-repeat:no-repeat; + background-position:right; + color:#627C36; +} + +.navpath li.navelem a +{ + height:32px; + display:block; + text-decoration: none; + outline: none; + color: #4A5D28; + font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif; + text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); + text-decoration: none; +} + +.navpath li.navelem a:hover +{ + color:#9EBD68; +} + +.navpath li.footer +{ + list-style-type:none; + float:right; + padding-left:10px; + padding-right:15px; + background-image:none; + background-repeat:no-repeat; + background-position:right; + color:#627C36; + font-size: 8pt; +} + + +div.summary +{ + float: right; + font-size: 8pt; + padding-right: 5px; + width: 50%; + text-align: right; +} + +div.summary a +{ + white-space: nowrap; +} + +table.classindex +{ + margin: 10px; + white-space: nowrap; + margin-left: 3%; + margin-right: 3%; + width: 94%; + border: 0; + border-spacing: 0; + padding: 0; +} + +div.ingroups +{ + font-size: 8pt; + width: 50%; + text-align: left; +} + +div.ingroups a +{ + white-space: nowrap; +} + +div.header +{ + background-image:url('nav_h.png'); + background-repeat:repeat-x; + background-color: #FBFCF9; + margin: 0px; + border-bottom: 1px solid #D9E5C4; +} + +div.headertitle +{ + padding: 5px 5px 5px 10px; +} + +dl +{ + padding: 0 0 0 10px; +} + +/* dl.note, dl.warning, dl.attention, dl.pre, dl.post, dl.invariant, dl.deprecated, dl.todo, dl.test, dl.bug */ +dl.section +{ + margin-left: 0px; + padding-left: 0px; +} + +dl.note +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #D0C000; +} + +dl.warning, dl.attention +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #FF0000; +} + +dl.pre, dl.post, dl.invariant +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #00D000; +} + +dl.deprecated +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #505050; +} + +dl.todo +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #00C0E0; +} + +dl.test +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #3030E0; +} + +dl.bug +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #C08050; +} + +dl.section dd { + margin-bottom: 6px; +} + + +#projectlogo +{ + text-align: center; + vertical-align: bottom; + border-collapse: separate; +} + +#projectlogo img +{ + border: 0px none; +} + +#projectalign +{ + vertical-align: middle; +} + +#projectname +{ + font: 300% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 2px 0px; +} + +#projectbrief +{ + font: 120% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 0px; +} + +#projectnumber +{ + font: 50% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 0px; +} + +#titlearea +{ + padding: 0px; + margin: 0px; + width: 100%; + border-bottom: 1px solid #90B453; +} + +.image +{ + text-align: center; +} + +.dotgraph +{ + text-align: center; +} + +.mscgraph +{ + text-align: center; +} + +.plantumlgraph +{ + text-align: center; +} + +.diagraph +{ + text-align: center; +} + +.caption +{ + font-weight: bold; +} + +div.zoom +{ + border: 1px solid #B7CE90; +} + +dl.citelist { + margin-bottom:50px; +} + +dl.citelist dt { + color:#5D7533; + float:left; + font-weight:bold; + margin-right:10px; + padding:5px; +} + +dl.citelist dd { + margin:2px 0; + padding:5px 0; +} + +div.toc { + padding: 14px 25px; + background-color: #F8FAF4; + border: 1px solid #E6EED8; + border-radius: 7px 7px 7px 7px; + float: right; + height: auto; + margin: 0 8px 10px 10px; + width: 200px; +} + +div.toc li { + background: url("bdwn.png") no-repeat scroll 0 5px transparent; + font: 10px/1.2 Verdana,DejaVu Sans,Geneva,sans-serif; + margin-top: 5px; + padding-left: 10px; + padding-top: 2px; +} + +div.toc h3 { + font: bold 12px/1.2 Arial,FreeSans,sans-serif; + color: #80A246; + border-bottom: 0 none; + margin: 0; +} + +div.toc ul { + list-style: none outside none; + border: medium none; + padding: 0px; +} + +div.toc li.level1 { + margin-left: 0px; +} + +div.toc li.level2 { + margin-left: 15px; +} + +div.toc li.level3 { + margin-left: 30px; +} + +div.toc li.level4 { + margin-left: 45px; +} + +.inherit_header { + font-weight: bold; + color: gray; + cursor: pointer; + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} + +.inherit_header td { + padding: 6px 0px 2px 5px; +} + +.inherit { + display: none; +} + +tr.heading h2 { + margin-top: 12px; + margin-bottom: 4px; +} + +/* tooltip related style info */ + +.ttc { + position: absolute; + display: none; +} + +#powerTip { + cursor: default; + white-space: nowrap; + background-color: white; + border: 1px solid gray; + border-radius: 4px 4px 4px 4px; + box-shadow: 1px 1px 7px gray; + display: none; + font-size: smaller; + max-width: 80%; + opacity: 0.9; + padding: 1ex 1em 1em; + position: absolute; + z-index: 2147483647; +} + +#powerTip div.ttdoc { + color: grey; + font-style: italic; +} + +#powerTip div.ttname a { + font-weight: bold; +} + +#powerTip div.ttname { + font-weight: bold; +} + +#powerTip div.ttdeci { + color: #006318; +} + +#powerTip div { + margin: 0px; + padding: 0px; + font: 12px/16px Roboto,sans-serif; +} + +#powerTip:before, #powerTip:after { + content: ""; + position: absolute; + margin: 0px; +} + +#powerTip.n:after, #powerTip.n:before, +#powerTip.s:after, #powerTip.s:before, +#powerTip.w:after, #powerTip.w:before, +#powerTip.e:after, #powerTip.e:before, +#powerTip.ne:after, #powerTip.ne:before, +#powerTip.se:after, #powerTip.se:before, +#powerTip.nw:after, #powerTip.nw:before, +#powerTip.sw:after, #powerTip.sw:before { + border: solid transparent; + content: " "; + height: 0; + width: 0; + position: absolute; +} + +#powerTip.n:after, #powerTip.s:after, +#powerTip.w:after, #powerTip.e:after, +#powerTip.nw:after, #powerTip.ne:after, +#powerTip.sw:after, #powerTip.se:after { + border-color: rgba(255, 255, 255, 0); +} + +#powerTip.n:before, #powerTip.s:before, +#powerTip.w:before, #powerTip.e:before, +#powerTip.nw:before, #powerTip.ne:before, +#powerTip.sw:before, #powerTip.se:before { + border-color: rgba(128, 128, 128, 0); +} + +#powerTip.n:after, #powerTip.n:before, +#powerTip.ne:after, #powerTip.ne:before, +#powerTip.nw:after, #powerTip.nw:before { + top: 100%; +} + +#powerTip.n:after, #powerTip.ne:after, #powerTip.nw:after { + border-top-color: #ffffff; + border-width: 10px; + margin: 0px -10px; +} +#powerTip.n:before { + border-top-color: #808080; + border-width: 11px; + margin: 0px -11px; +} +#powerTip.n:after, #powerTip.n:before { + left: 50%; +} + +#powerTip.nw:after, #powerTip.nw:before { + right: 14px; +} + +#powerTip.ne:after, #powerTip.ne:before { + left: 14px; +} + +#powerTip.s:after, #powerTip.s:before, +#powerTip.se:after, #powerTip.se:before, +#powerTip.sw:after, #powerTip.sw:before { + bottom: 100%; +} + +#powerTip.s:after, #powerTip.se:after, #powerTip.sw:after { + border-bottom-color: #ffffff; + border-width: 10px; + margin: 0px -10px; +} + +#powerTip.s:before, #powerTip.se:before, #powerTip.sw:before { + border-bottom-color: #808080; + border-width: 11px; + margin: 0px -11px; +} + +#powerTip.s:after, #powerTip.s:before { + left: 50%; +} + +#powerTip.sw:after, #powerTip.sw:before { + right: 14px; +} + +#powerTip.se:after, #powerTip.se:before { + left: 14px; +} + +#powerTip.e:after, #powerTip.e:before { + left: 100%; +} +#powerTip.e:after { + border-left-color: #ffffff; + border-width: 10px; + top: 50%; + margin-top: -10px; +} +#powerTip.e:before { + border-left-color: #808080; + border-width: 11px; + top: 50%; + margin-top: -11px; +} + +#powerTip.w:after, #powerTip.w:before { + right: 100%; +} +#powerTip.w:after { + border-right-color: #ffffff; + border-width: 10px; + top: 50%; + margin-top: -10px; +} +#powerTip.w:before { + border-right-color: #808080; + border-width: 11px; + top: 50%; + margin-top: -11px; +} + +@media print +{ + #top { display: none; } + #side-nav { display: none; } + #nav-path { display: none; } + body { overflow:visible; } + h1, h2, h3, h4, h5, h6 { page-break-after: avoid; } + .summary { display: none; } + .memitem { page-break-inside: avoid; } + #doc-content + { + margin-left:0 !important; + height:auto !important; + width:auto !important; + overflow:inherit; + display:inline; + } +} + +/* @group Markdown */ + +/* +table.markdownTable { + border-collapse:collapse; + margin-top: 4px; + margin-bottom: 4px; +} + +table.markdownTable td, table.markdownTable th { + border: 1px solid #52682D; + padding: 3px 7px 2px; +} + +table.markdownTableHead tr { +} + +table.markdownTableBodyLeft td, table.markdownTable th { + border: 1px solid #52682D; + padding: 3px 7px 2px; +} + +th.markdownTableHeadLeft th.markdownTableHeadRight th.markdownTableHeadCenter th.markdownTableHeadNone { + background-color: #657F37; + color: #FFFFFF; + font-size: 110%; + padding-bottom: 4px; + padding-top: 5px; +} + +th.markdownTableHeadLeft { + text-align: left +} + +th.markdownTableHeadRight { + text-align: right +} + +th.markdownTableHeadCenter { + text-align: center +} +*/ + +table.markdownTable { + border-collapse:collapse; + margin-top: 4px; + margin-bottom: 4px; +} + +table.markdownTable td, table.markdownTable th { + border: 1px solid #52682D; + padding: 3px 7px 2px; +} + +table.markdownTable tr { +} + +th.markdownTableHeadLeft, th.markdownTableHeadRight, th.markdownTableHeadCenter, th.markdownTableHeadNone { + background-color: #374F7F; + color: #FFFFFF; + font-size: 110%; + padding-bottom: 4px; + padding-top: 5px; +} + +th.markdownTableHeadLeft, td.markdownTableBodyLeft { + text-align: left +} + +th.markdownTableHeadRight, td.markdownTableBodyRight { + text-align: right +} + +th.markdownTableHeadCenter, td.markdownTableBodyCenter { + text-align: center +} + + +/* @end */ diff --git a/docs/generated-html/doxygen.png b/docs/generated-html/doxygen.png new file mode 100644 index 00000000..64802be6 Binary files /dev/null and b/docs/generated-html/doxygen.png differ diff --git a/docs/generated-html/dynsections.js b/docs/generated-html/dynsections.js new file mode 100644 index 00000000..c1ce1226 --- /dev/null +++ b/docs/generated-html/dynsections.js @@ -0,0 +1,120 @@ +/* + @licstart The following is the entire license notice for the + JavaScript code in this file. + + Copyright (C) 1997-2017 by Dimitri van Heesch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + @licend The above is the entire license notice + for the JavaScript code in this file + */ +function toggleVisibility(linkObj) +{ + var base = $(linkObj).attr('id'); + var summary = $('#'+base+'-summary'); + var content = $('#'+base+'-content'); + var trigger = $('#'+base+'-trigger'); + var src=$(trigger).attr('src'); + if (content.is(':visible')===true) { + content.hide(); + summary.show(); + $(linkObj).addClass('closed').removeClass('opened'); + $(trigger).attr('src',src.substring(0,src.length-8)+'closed.png'); + } else { + content.show(); + summary.hide(); + $(linkObj).removeClass('closed').addClass('opened'); + $(trigger).attr('src',src.substring(0,src.length-10)+'open.png'); + } + return false; +} + +function updateStripes() +{ + $('table.directory tr'). + removeClass('even').filter(':visible:even').addClass('even'); +} + +function toggleLevel(level) +{ + $('table.directory tr').each(function() { + var l = this.id.split('_').length-1; + var i = $('#img'+this.id.substring(3)); + var a = $('#arr'+this.id.substring(3)); + if (l + + + + + + +Cutlass: File List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
File List
+
+
+
Here is a list of all files with brief descriptions:
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 clear_accumulators.hDefines abstractions for efficiently clearing accumulator tiles
 convert.hDefines conversion operations among Fragments of different base type
 coord.hA Coord is a coordinate of arbitrary rank into a tensor or matrix
 core_io.hHelpers for printing cutlass/core objects
 cutlass.hBasic include for CUTLASS macros
 cutlass_math.hMath utilities
 debug.hDebugging and logging functionality
 dgemm_traits.hDefines structural traits of double-precision GEMM
 fragment.hDefines Fragment, a statically-sized array for storing parts of matrices within a thread's registers
 fragment_load_store.hDefines accessors for loading and storing fragments to memory efficiently
 fragment_multiply_add.hDefines multiply-add operations on fragments within a thread
 gemm.hImplements a software-pipelined efficient GEMM
 gemm_epilogue.hImplements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product
 gemm_epilogue_traits.hDefines structural properties of the GEMM epilogue
 gemm_global_stream.hImplements efficient loading of the thread block-level tile from global memory and storing to shared memory
 gemm_global_tile.hDefines iterators for efficiently loading and storing to global memory
 gemm_operand.hDefines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory
 gemm_shared_stream.hDefines abstractions for managing loading and storing fragments to shared memory in the efficient GEMM pipeline
 gemm_shared_tile.hDefines iterators for efficiently loading and storing tiles to and from shared memory
 gemm_traits.hDefines structural properties of complete GEMM computation
 hgemm_global_tile.hTile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits
 hgemm_multiply_add.hSpecialization implementing multiply-add operation on half-precision floating point fragments
 hgemm_swizzle.hTransposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands
 hgemm_traits.hDefies structural properties of half-precision GEMM computation
 identity_block_swizzle.hDefies functors for mapping blockIdx to partitions of the GEMM computation
 igemm_epilogue.hDefines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point output matrix formats
 igemm_global_tile.hImplements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory
 igemm_multiply_add.hImplements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction
 igemm_swizzle.hTransposes a fragment of data containing packed 8-bit integer elements
 igemm_traits.hDefies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary
 iterator_access.hFree functions for loading and storing to implementations of tile iteartor concepts
 linear_scaling.hImplements the BLAS linear scaling function alpha*AB + beta*C
 load_store.hDefines abstractions for efficiently loading and storing vectors to memory
 matrix_traits.hDefines properties of matrices used to denote layout and operands to GEMM kernels
 platform.hC++ features that may be otherwise unimplemented for CUDA device functions
 predicate_vector.hDefines container classes and iterators for managing a statically sized vector of boolean predicates
 reshape_tile.hDefines a type for restructuring a tile
 sgemm_traits.hDefies structural properties of single-precision GEMM
 shape.hDefines Shape implementing the Layout concept for representing a 4D hypercube of objects
 tensor_ref.hDefines a structure containing strides, bounds, and a pointer to tensor data
 tensor_view.hDefines a structure containing strides and a pointer to tensor data
 thread_multiply_add.hTemplate implementing matrix multiply-add operations on fragments
 tile_iterator.hDefines the Tile Traits concept and iterators for loading and storing to tiles efficiently
 tile_traits_standard.hDefines tile traits for several tile partitioning arrangements of threads expected to achieve efficient streaming performance
 vector.hDefines a 1D vector of elements held in the registers of each thread
 wmma_gemm_epilogue_traits.hDefines structural properties of WMMA GEMM's epilogue phase
 wmma_gemm_global_tile.hDefines tile iterator traits for loading thread block-level tile from global memory
 wmma_gemm_multiply_add.hImplements warp-level matrix multiply-accumulate operation using CUDA WMMA API
 wmma_gemm_shared_tile.hDefines iterator traits for efficiently loading and storing fragment to and from shared memory, specialized for WMMA GEMM
 wmma_gemm_traits.hDefies structural properties of GEMM targeting WMMA API in CUDA
 wmma_matrix.hAbstractions for loading and storing matrices using the CUDA WMMA API
+
+
+ + + + diff --git a/docs/generated-html/folderclosed.png b/docs/generated-html/folderclosed.png new file mode 100644 index 00000000..7a18333d Binary files /dev/null and b/docs/generated-html/folderclosed.png differ diff --git a/docs/generated-html/folderopen.png b/docs/generated-html/folderopen.png new file mode 100644 index 00000000..fcbdb42d Binary files /dev/null and b/docs/generated-html/folderopen.png differ diff --git a/docs/generated-html/fragment_8h.html b/docs/generated-html/fragment_8h.html new file mode 100644 index 00000000..d97ac7b5 --- /dev/null +++ b/docs/generated-html/fragment_8h.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: fragment.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
fragment.h File Reference
+
+
+ +

Defines Fragment, a statically-sized array for storing parts of matrices within a thread's registers. +More...

+
#include <assert.h>
+#include <cutlass/shape.h>
+#include <cutlass/util/cutlass_math.h>
+#include <cutlass/vector.h>
+
+

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::StorageType< kAlignment_ >
 
struct  cutlass::StorageType< 4 >
 
struct  cutlass::StorageType< 2 >
 
struct  cutlass::StorageType< 1 >
 
struct  cutlass::Fragment< Element_, kElements_, kAlignment_ >
 A template defining Fragment Concept. More...
 
struct  cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
 A template defining Fragment Iterator Concept. More...
 
struct  cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
 
+ + + +

+Namespaces

 cutlass
 
+
+ + + + diff --git a/docs/generated-html/fragment_8h_source.html b/docs/generated-html/fragment_8h_source.html new file mode 100644 index 00000000..8006bbbd --- /dev/null +++ b/docs/generated-html/fragment_8h_source.html @@ -0,0 +1,141 @@ + + + + + + + +Cutlass: fragment.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
fragment.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <assert.h>
32 #include <cutlass/shape.h>
34 #include <cutlass/vector.h>
35 
36 namespace cutlass {
37 
39 
56 
73 
75 template <int kAlignment_>
76 struct StorageType {
77  typedef uint64_t Type;
78 };
79 template <>
80 struct StorageType<4> {
81  typedef uint32_t Type;
82 };
83 template <>
84 struct StorageType<2> {
85  typedef uint16_t Type;
86 };
87 template <>
88 struct StorageType<1> {
89  typedef uint8_t Type;
90 };
91 
93 
98 template <typename Element_, int kElements_, size_t kAlignment_ = 16>
99 struct Fragment : public AlignedStruct<kAlignment_> {
101  static_assert(kAlignment_ == 16 || kAlignment_ >= sizeof(Element_), "Alignment is too small");
103  static_assert(is_pow2<kAlignment_>::value, "Alignment must be a power of two");
104 
108  typedef Element_ Element;
110  static int const kElements = kElements_;
111 
113  CUTLASS_DEVICE void clear() {
114  // Avoid element-wise access for sub 32b element type
115  if (kAlignment_ >= 8 && (kElements * sizeof(Element)) % 8 == 0) {
116  uint64_t* ptr = reinterpret_cast<uint64_t*>(storage);
117  for (int i = 0; i < (kElements * sizeof(Element)) / 8; ++i) {
118  ptr[i] = uint64_t(0);
119  }
120  } else if (kAlignment_ >= 4 && (kElements * sizeof(Element)) % 4 == 0) {
121  uint32_t* ptr = reinterpret_cast<uint32_t*>(storage);
122  for (int i = 0; i < (kElements * sizeof(Element)) / 4; ++i) {
123  ptr[i] = uint32_t(0);
124  }
125  } else if (kAlignment_ >= 2 && (kElements * sizeof(Element)) % 2 == 0) {
126  uint16_t* ptr = reinterpret_cast<uint16_t*>(storage);
127  for (int i = 0; i < (kElements * sizeof(Element)) / 2; ++i) {
128  ptr[i] = uint16_t(0);
129  }
130  } else {
131  for (int i = 0; i < kElements; ++i) {
132  storage[i] = 0;
133  }
134  }
135  }
136 
138  CUTLASS_DEVICE Element& operator[](int i) {
139  assert(i < kElements_);
140  return reinterpret_cast<Element*>(storage)[i];
141  }
142 
144  CUTLASS_DEVICE Element const& operator[](int i) const {
145  assert(i < kElements_);
146  return reinterpret_cast<Element const*>(storage)[i];
147  }
148 
149  private:
152 
154  static int const kStorageCount =
155  (sizeof(Element_) * kElements_ + sizeof(StorageType) - 1) / sizeof(StorageType);
157  StorageType storage[kStorageCount];
158 
160  static_assert(sizeof(StorageType) <= kAlignment_, "StorageType is too big for given alignment");
161 };
162 
164 
169 template <typename Fragment_, typename Iterations_, typename AccessType_>
174  typedef Fragment_ Fragment;
176  typedef Iterations_ Iterations;
178  typedef AccessType_ AccessType;
179 
181  typedef typename Fragment::Element Element;
183  static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element));
188 
190  template <typename OtherFragment_>
191  CUTLASS_DEVICE FragmentIterator(OtherFragment_& fragment, int offset = 0)
192  : pointer(reinterpret_cast<Element*>(&fragment[offset])) {
193  static_assert(OtherFragment_::kElements >= Fragment::kElements, "");
194  }
195 
197  CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const {
198  int const imm = ComputeOffsetFromStrides<Strides>::get(d, h, w, c);
199  return reinterpret_cast<AccessType const&>(pointer[imm]);
200  }
201 
203  CUTLASS_DEVICE AccessType& at(int d, int h, int w, int c = 0) {
204  int const imm = ComputeOffsetFromStrides<Strides>::get(d, h, w, c);
205  return reinterpret_cast<AccessType&>(pointer[imm]);
206  }
207 
209  CUTLASS_DEVICE AccessType const& operator[](int i) const {
210  return reinterpret_cast<AccessType const&>(pointer[i * kElementsPerAccess]);
211  }
212 
214  CUTLASS_DEVICE AccessType& operator[](int i) {
215  return reinterpret_cast<AccessType&>(pointer[i * kElementsPerAccess]);
216  }
217 
219  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; }
220 
223 };
224 
226 
227 template <typename Fragment_, typename Iterations_, typename AccessType_>
232  typedef Fragment_ Fragment;
234  typedef Iterations_ Iterations;
236  typedef AccessType_ AccessType;
237 
239  typedef typename Fragment::Element Element;
241  static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element));
246 
248  template <typename OtherFragment_>
249  CUTLASS_DEVICE FragmentConstIterator(OtherFragment_& fragment, int offset = 0)
250  : pointer(reinterpret_cast<Element const*>(&fragment[offset])) {
251  static_assert(OtherFragment_::kElements >= Fragment::kElements, "");
252  }
254  CUTLASS_DEVICE FragmentConstIterator(
256  : pointer(reinterpret_cast<Element const*>(rhs_.offset)) {}
257 
259  CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const {
260  int const imm = ComputeOffsetFromStrides<IterationsStrides>::get(d, h, w, c);
261  return reinterpret_cast<AccessType const&>(pointer[imm]);
262  }
263 
265  CUTLASS_DEVICE AccessType const& operator[](int i) const {
266  return reinterpret_cast<AccessType const&>(pointer[i * kElementsPerAccess]);
267  }
268 
270  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; }
271 
273  Element const* pointer;
274 };
275 
277 
278 } // namespace cutlass
CUTLASS_DEVICE void clear()
Clear a fragment.
Definition: fragment.h:113
+
Definition: convert.h:33
+
CUTLASS_DEVICE Element & operator[](int i)
The accessor.
Definition: fragment.h:138
+
CUTLASS_DEVICE AccessType & at(int d, int h, int w, int c=0)
The accessor.
Definition: fragment.h:203
+
Definition: vector.h:41
+
Definition: fragment.h:228
+
CUTLASS_DEVICE AccessType const & operator[](int i) const
The accessor.
Definition: fragment.h:265
+
Shape< Shape_::kH *Shape_::kW *Shape_::kC, Shape_::kW *Shape_::kC, Shape_::kC, 1 > Shape
Definition: shape.h:155
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
Fragment::Element Element
The element.
Definition: fragment.h:181
+
static int const kElementsPerAccess
The number of elements per access.
Definition: fragment.h:241
+
Fragment_ Fragment
The fragment.
Definition: fragment.h:174
+
Fragment_ Fragment
The fragment.
Definition: fragment.h:232
+
CUTLASS_DEVICE AccessType & operator[](int i)
The accessor.
Definition: fragment.h:214
+
Fragment::Element Element
The element.
Definition: fragment.h:239
+
ShapeStrides< FragmentShape >::Shape IterationsStrides
The linear strides for iterations.
Definition: fragment.h:245
+
CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
Is the iterator valid?
Definition: fragment.h:270
+
CUTLASS_DEVICE FragmentIterator(OtherFragment_ &fragment, int offset=0)
Ctor.
Definition: fragment.h:191
+
Fragment< Element_, kElements_ > This_
Make sure the alignment makes sense wrt the size of elements.
Definition: fragment.h:101
+
FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
This class.
Definition: fragment.h:172
+
ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
The shape of the the fragment.
Definition: fragment.h:243
+
Math utilities.
+
Definition: fragment.h:76
+
uint32_t Type
Definition: fragment.h:81
+
uint8_t Type
Definition: fragment.h:89
+
static CUTLASS_DEVICE int get(int d, int h, int w, int c)
Definition: shape.h:211
+
Element * pointer
The pointer.
Definition: fragment.h:222
+
AccessType_ AccessType
The access type.
Definition: fragment.h:236
+
Definition: shape.h:118
+
ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
The shape of the the fragment.
Definition: fragment.h:185
+
A template defining Fragment Iterator Concept.
Definition: fragment.h:170
+
static int const kElements
The number of elements.
Definition: fragment.h:110
+
CUTLASS_DEVICE Element const & operator[](int i) const
The accessor.
Definition: fragment.h:144
+
Iterations_ Iterations
The number of iterations.
Definition: fragment.h:234
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Iterations_ Iterations
The number of iterations.
Definition: fragment.h:176
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
CUTLASS_DEVICE AccessType const & at(int d, int h, int w, int c=0) const
The accessor.
Definition: fragment.h:259
+
Element_ Element
The element.
Definition: fragment.h:108
+
FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
This class.
Definition: fragment.h:230
+
CUTLASS_DEVICE AccessType const & operator[](int i) const
The accessor.
Definition: fragment.h:209
+
uint16_t Type
Definition: fragment.h:85
+
Defines a 1D vector of elements held in the registers of each thread.
+
CUTLASS_DEVICE FragmentConstIterator(FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)
Create from non-constant FragmentIterator.
Definition: fragment.h:254
+
static int const kElementsPerAccess
The number of elements per access.
Definition: fragment.h:183
+
ShapeStrides< FragmentShape >::Shape Strides
The linear strides for iterations.
Definition: fragment.h:187
+
Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
+
AccessType_ AccessType
The access type.
Definition: fragment.h:178
+
CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
Is the iterator valid?
Definition: fragment.h:219
+
uint64_t Type
Definition: fragment.h:77
+
Definition: cutlass_math.h:45
+
CUTLASS_DEVICE FragmentConstIterator(OtherFragment_ &fragment, int offset=0)
Ctor.
Definition: fragment.h:249
+
CUTLASS_DEVICE AccessType const & at(int d, int h, int w, int c=0) const
The accessor.
Definition: fragment.h:197
+
Element const * pointer
The pointer.
Definition: fragment.h:273
+
+ + + + diff --git a/docs/generated-html/fragment__load__store_8h.html b/docs/generated-html/fragment__load__store_8h.html new file mode 100644 index 00000000..1c92a684 --- /dev/null +++ b/docs/generated-html/fragment__load__store_8h.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: fragment_load_store.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
fragment_load_store.h File Reference
+
+ + + + + diff --git a/docs/generated-html/fragment__load__store_8h_source.html b/docs/generated-html/fragment__load__store_8h_source.html new file mode 100644 index 00000000..db877fbd --- /dev/null +++ b/docs/generated-html/fragment__load__store_8h_source.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: fragment_load_store.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
fragment_load_store.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/load_store.h>
31 #include <cutlass/vector.h>
32 
33 namespace cutlass {
34 
36 
37 template <IteratorFragment::Kind kIteratorFragment,
38  int kAccessSize,
39  typename Scalar_,
40  MemorySpace::Kind Memory_,
41  typename FragmentElement_,
42  int kStride>
43 struct FragmentLoad {};
44 
45 template <int kAccessSize,
46  typename Scalar_,
47  MemorySpace::Kind Memory_,
48  typename FragmentElement_,
49  int kStride>
50 struct FragmentLoad<IteratorFragment::kWmmaMatrix,
51  kAccessSize,
52  Scalar_,
53  Memory_,
54  FragmentElement_,
55  kStride> {
57  typedef FragmentElement_ AccessType;
58 
60  static CUTLASS_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) {
61  value.load(&pointer[offset], kStride);
62  }
63 };
64 
65 template <int kAccessSize,
66  typename Scalar_,
67  MemorySpace::Kind Memory_,
68  typename FragmentElement_,
69  int kStride>
71  kAccessSize,
72  Scalar_,
73  Memory_,
74  FragmentElement_,
75  kStride> {
78 
80  static CUTLASS_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) {
81  Load<Scalar_, kAccessSize, Memory_>::load(value, pointer, offset);
82  }
83 };
84 
85 template <IteratorFragment::Kind kIteratorFragment,
86  int kAccessSize,
87  typename Scalar_,
88  MemorySpace::Kind Memory_,
89  typename FragmentElement_,
90  int kStride>
91 struct FragmentStore {};
92 
93 template <int kAccessSize,
94  typename Scalar_,
95  MemorySpace::Kind Memory_,
96  typename FragmentElement_,
97  int kStride>
98 struct FragmentStore<IteratorFragment::kWmmaMatrix,
99  kAccessSize,
100  Scalar_,
101  Memory_,
102  FragmentElement_,
103  kStride> {
105  typedef FragmentElement_ AccessType;
106 
108  static CUTLASS_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) {
109  value.store(&pointer[offset], kStride);
110  }
111 };
112 
113 template <int kAccessSize,
114  typename Scalar_,
115  MemorySpace::Kind Memory_,
116  typename FragmentElement_,
117  int kStride>
119  kAccessSize,
120  Scalar_,
121  Memory_,
122  FragmentElement_,
123  kStride> {
126 
128  static CUTLASS_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) {
129  Store<Scalar_, kAccessSize, Memory_>::store(value, pointer, offset);
130  }
131 };
132 
134 
135 }
Definition: fragment_load_store.h:43
+
Vectorize< Scalar_, kAccessSize >::Type AccessType
The input type.
Definition: fragment_load_store.h:125
+
Definition: convert.h:33
+ +
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: fragment_load_store.h:77
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:59
+
static CUTLASS_DEVICE void store(AccessType const &value, Scalar_ *pointer, int offset)
The store function.
Definition: fragment_load_store.h:108
+
static CUTLASS_DEVICE void store(AccessType const &value, Scalar_ *pointer, int offset)
The store function.
Definition: fragment_load_store.h:128
+
Kind
Definition: load_store.h:40
+
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:136
+
Kind
Definition: tile_iterator.h:67
+
static CUTLASS_DEVICE void load(AccessType &value, Scalar_ const *pointer, int offset)
The load function.
Definition: fragment_load_store.h:80
+
Defines abstractions for efficiently loading and storing vectors to memory.
+
Definition: vector.h:61
+
Defines a 1D vector of elements held in the registers of each thread.
+ +
Definition: fragment_load_store.h:91
+
static CUTLASS_DEVICE void load(AccessType &value, Scalar_ const *pointer, int offset)
The load function.
Definition: fragment_load_store.h:60
+
Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix.
Definition: tile_iterator.h:66
+
+ + + + diff --git a/docs/generated-html/fragment__multiply__add_8h.html b/docs/generated-html/fragment__multiply__add_8h.html new file mode 100644 index 00000000..59a94dfd --- /dev/null +++ b/docs/generated-html/fragment__multiply__add_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: fragment_multiply_add.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
fragment_multiply_add.h File Reference
+
+
+ +

Defines multiply-add operations on fragments within a thread. +More...

+
#include <cutlass/fragment.h>
+
+

Go to the source code of this file.

+ + + + + + +

+Classes

struct  cutlass::gemm::FragmentMultiplyAdd< Scalar_ >
 
struct  cutlass::gemm::FragmentMultiplyAdd< half >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/fragment__multiply__add_8h_source.html b/docs/generated-html/fragment__multiply__add_8h_source.html new file mode 100644 index 00000000..9b453fd9 --- /dev/null +++ b/docs/generated-html/fragment__multiply__add_8h_source.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: fragment_multiply_add.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
fragment_multiply_add.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/fragment.h>
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename Scalar_>
42  typedef Scalar_ ScalarA;
44  typedef Scalar_ ScalarB;
46  typedef Scalar_ ScalarC;
47 
49  CUTLASS_DEVICE FragmentMultiplyAdd() {}
50 
52  template <typename Fragment_>
53  CUTLASS_DEVICE void multiply(Scalar_ a, Fragment_ const& b, Fragment_& d) {
54  for (int j = 0; j < Fragment_::kElements; ++j) {
55  d[j] = a * b[j];
56  }
57  }
58 
60  template <typename Fragment_>
61  CUTLASS_DEVICE void multiply_add(Scalar_ a,
62  Fragment_ const& b,
63  Fragment_ const& c,
64  Fragment_& d) {
65  for (int j = 0; j < Fragment_::kElements; ++j) {
66  d[j] = a * b[j] + c[j];
67  }
68  }
69 };
70 
72 
73 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
74 template <>
75 struct FragmentMultiplyAdd<half> {
79  typedef half ScalarA;
81  typedef half ScalarB;
83  typedef half ScalarC;
84 
86  CUTLASS_DEVICE FragmentMultiplyAdd() {}
87 
89  template <typename Fragment_>
90  CUTLASS_DEVICE void multiply(half a, Fragment_ const& b, Fragment_& d) {
91 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
92  // The input.
93  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
94  // The output.
95  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
96 
97  // Assemble a half2 from a.
98  __half2 const a_half2 = __half2half2(a);
99 
100  for (int i = 0; i < Fragment_::kElements / 2; ++i) {
101  d_half2[i] = __hmul2(a_half2, b_half2[i]);
102  }
103 #endif
104  }
105 
107  template <typename Fragment_>
108  CUTLASS_DEVICE void multiply_add(half a, Fragment_ const& b, Fragment_ const& c, Fragment_& d) {
109 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
110  // The inputs.
111  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
112  __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]);
113  // The output.
114  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
115 
116  // Assemble a half2 from a.
117  __half2 const a_half2 = __half2half2(a);
118 
119  for (int i = 0; i < Fragment_::kElements / 2; ++i) {
120  d_half2[i] = __hfma2(a_half2, b_half2[i], c_half2[i]);
121  }
122 #endif
123  }
124 };
125 
126 #endif
127 
129 
130 } // namespace gemm
131 } // namespace cutlass
Scalar_ ScalarB
The type for B.
Definition: fragment_multiply_add.h:44
+
Definition: convert.h:33
+
CUTLASS_DEVICE void multiply(Scalar_ a, Fragment_ const &b, Fragment_ &d)
Multiply : d = a*b.
Definition: fragment_multiply_add.h:53
+
half ScalarA
The type for A.
Definition: fragment_multiply_add.h:79
+
CUTLASS_DEVICE FragmentMultiplyAdd()
Ctor.
Definition: fragment_multiply_add.h:86
+
CUTLASS_DEVICE void multiply_add(Scalar_ a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
Multiply : d = a*b + c.
Definition: fragment_multiply_add.h:61
+
half ScalarC
The type for C and D.
Definition: fragment_multiply_add.h:83
+
CUTLASS_DEVICE void multiply_add(half a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
Multiply : d = a*b + c.
Definition: fragment_multiply_add.h:108
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fragment_multiply_add.h:40
+
Scalar_ ScalarC
The type for C and D.
Definition: fragment_multiply_add.h:46
+
Scalar_ ScalarA
The type for A.
Definition: fragment_multiply_add.h:42
+
CUTLASS_DEVICE FragmentMultiplyAdd()
Ctor.
Definition: fragment_multiply_add.h:49
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
CUTLASS_DEVICE void multiply(half a, Fragment_ const &b, Fragment_ &d)
Multiply : d = a*b.
Definition: fragment_multiply_add.h:90
+
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fragment_multiply_add.h:77
+
half ScalarB
The type for B.
Definition: fragment_multiply_add.h:81
+
Definition: fragment_multiply_add.h:38
+
+ + + + diff --git a/docs/generated-html/fragment__stream_8h.html b/docs/generated-html/fragment__stream_8h.html new file mode 100644 index 00000000..7c8ab1e5 --- /dev/null +++ b/docs/generated-html/fragment__stream_8h.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: fragment_stream.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
fragment_stream.h File Reference
+
+
+ +

An abstraction for implementing a stream loading a tile and storing a tile using a pair of tile iterators. +More...

+ +

Go to the source code of this file.

+ + + + + + + + +

+Classes

struct  cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
 Manages a pair of iterators to stream data from global memory to shared. More...
 
struct  cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params
 Parameters passed to initialize the ierator. More...
 
+ + + +

+Namespaces

 cutlass
 
+
+ + + + diff --git a/docs/generated-html/fragment__stream_8h_source.html b/docs/generated-html/fragment__stream_8h_source.html new file mode 100644 index 00000000..0810ad05 --- /dev/null +++ b/docs/generated-html/fragment__stream_8h_source.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: fragment_stream.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
fragment_stream.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/convert.h>
32 #include <cutlass/fragment.h>
34 #include <cutlass/tensor_ref.h>
35 #include <cutlass/tile_iterator.h>
36 
39 #include <cutlass/matrix_traits.h>
40 
41 namespace cutlass {
42 
44 
46 template <typename Traits_, typename LoadIterator_, typename StoreIterator_,
47  typename Convert_ =
48  FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>,
49  typename Index_ = int>
51  //
52  // Constant and type definitions
53  //
54 
56  typedef Traits_ Traits;
57 
59  typedef LoadIterator_ LoadIterator;
60 
62  typedef StoreIterator_ StoreIterator;
63 
65  typedef Convert_ Convert;
66 
68  typedef Index_ Index;
69 
70  //
71  // Dependent types
72  //
73 
75  typedef typename LoadIterator::Fragment Fragment;
76 
79 
81  typedef typename StoreIterator::Storage Storage;
82 
85 
86  //
87  // Nested classes
88  //
89 
91  struct Params {
93  typedef typename LoadIterator::Params LoadParams;
94 
96  typedef typename StoreIterator::Params StoreParams;
97 
98  //
99  // Data members
100  //
101 
104 
107 
108  //
109  // Methods
110  //
111 
113  CUTLASS_HOST_DEVICE int initialize(LoadParams const &_load_params,
114  StoreParams const &_store_params) {
115  load_params = _load_params;
116  store_params = _store_params;
117  return 0;
118  }
119  };
120 
121  //
122  // Data members
123  //
124 
127 
129  typename LoadIterator::PredicateVector predicates;
130 
133 
136 
139 
140  //
141  // Static members
142  //
143 
145  static CUTLASS_DEVICE void shared_store_fence() { __syncthreads(); }
146 
147  //
148  // Methods
149  //
150 
151  CUTLASS_DEVICE
153 
155  CUTLASS_DEVICE
156  FragmentStream(Params const &params, Coord<3> const &bounds,
157  Coord<3> const &block_offset = make_Coord(0, 0, 0))
158  : load_iterator(params.load_params, block_offset), store_iterator(params.store_params) {
159  // set predicates
160  initialize_predicates(bounds, block_offset);
161 
162  fetch.clear();
163  }
164 
166  CUTLASS_DEVICE
167  void load() {
168  ConstPredicateTileAdapter<typename LoadIterator::PredicateVector,
169  typename LoadIterator::Iterations>
170  predicates_it(predicates);
172  }
173 
175  CUTLASS_DEVICE
176  void commit() {
177  StoreFragment store_fragment(convert(fetch));
178 
179  iterator_store(store_iterator, store_fragment);
180  }
181 
183  CUTLASS_DEVICE
184  void initialize_predicates(Coord<3> const &bounds, Coord<3> const &block_offset) {
186  predicates_it(predicates);
187  load_iterator.initialize_predicates(predicates_it, bounds, block_offset);
188  }
189 };
190 
192 
193 } // namespace cutlass
StoreIterator store_iterator
Stores fragment to shared memory.
Definition: fragment_stream.h:132
+ +
Definition: convert.h:34
+
Defines a structure containing strides, bounds, and a pointer to tensor data.
+
StoreParams store_params
Parameters to the store iterator.
Definition: fragment_stream.h:106
+
LoadIterator::PredicateVector predicates
Predicate vector.
Definition: fragment_stream.h:129
+
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
+
Defines structural properties of complete GEMM computation.
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
StoreIterator::Storage Storage
Destination storage.
Definition: fragment_stream.h:81
+
CUTLASS_DEVICE FragmentStream(Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
Constructor.
Definition: fragment_stream.h:156
+
Fragment fetch
Fragment fetched by load iterator.
Definition: fragment_stream.h:135
+
CUTLASS_DEVICE void commit()
Commits the fragment.
Definition: fragment_stream.h:176
+
StoreIterator_ StoreIterator
Defines the store iterator.
Definition: fragment_stream.h:62
+
Adapter to enable random access to predicates via logical coordinate within a tile.
Definition: predicate_vector.h:435
+
Index_ Index
Index type.
Definition: fragment_stream.h:68
+
Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSize > Storage
The storage.
Definition: tile_iterator.h:181
+ +
static CUTLASS_DEVICE void shared_store_fence()
The memory fence for shared stores.
Definition: fragment_stream.h:145
+
Free functions for loading and storing to implementations of tile iteartor concepts.
+ +
CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
Loads a fragment from an input iterator, masked by a predicate iterator.
Definition: iterator_access.h:113
+
StoreIterator::Storage SharedStoreStorage
The storage in shared memory.
Definition: fragment_stream.h:84
+
CUTLASS_DEVICE void initialize_predicates(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
Recomputes predicates.
Definition: fragment_stream.h:184
+
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
+
Manages a pair of iterators to stream data from global memory to shared.
Definition: fragment_stream.h:50
+
CUTLASS_DEVICE FragmentStream()
Definition: fragment_stream.h:152
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:37
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
Stores a fragment to an output iterator.
Definition: iterator_access.h:174
+
LoadIterator::Params LoadParams
Load parameters.
Definition: fragment_stream.h:93
+
Convert_ Convert
Converts between tiles.
Definition: fragment_stream.h:65
+
StoreIterator::Fragment StoreFragment
Stored fragment type.
Definition: fragment_stream.h:78
+
Traits_ Traits
Defines traits of WMMA GEMM tile stream.
Definition: fragment_stream.h:56
+
CUTLASS_DEVICE void load()
Loads the fragment.
Definition: fragment_stream.h:167
+
StoreIterator::Params StoreParams
Store parameters.
Definition: fragment_stream.h:96
+
Adapter to enable random access to predicates via logical coordinate within a tile.
Definition: predicate_vector.h:466
+
LoadIterator::Fragment Fragment
Loaded fragment type.
Definition: fragment_stream.h:75
+
LoadIterator_ LoadIterator
Defines the load iterator.
Definition: fragment_stream.h:59
+
CUTLASS_HOST_DEVICE int initialize(LoadParams const &_load_params, StoreParams const &_store_params)
Initializes parameters.
Definition: fragment_stream.h:113
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
LoadParams load_params
Parameters to load iterator.
Definition: fragment_stream.h:103
+
Convert convert
Converts between load fragments and store fragments.
Definition: fragment_stream.h:138
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
LoadIterator load_iterator
Loads fragment from global memory.
Definition: fragment_stream.h:126
+
Defines conversion operations among Fragments of different base type.
+ +
Parameters passed to initialize the ierator.
Definition: fragment_stream.h:91
+
+ + + + diff --git a/docs/generated-html/functions.html b/docs/generated-html/functions.html new file mode 100644 index 00000000..e6b156fb --- /dev/null +++ b/docs/generated-html/functions.html @@ -0,0 +1,149 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- a -

+
+ + + + diff --git a/docs/generated-html/functions_0x7e.html b/docs/generated-html/functions_0x7e.html new file mode 100644 index 00000000..41aa664c --- /dev/null +++ b/docs/generated-html/functions_0x7e.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- ~ -

+
+ + + + diff --git a/docs/generated-html/functions_b.html b/docs/generated-html/functions_b.html new file mode 100644 index 00000000..79038aa1 --- /dev/null +++ b/docs/generated-html/functions_b.html @@ -0,0 +1,122 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- b -

+
+ + + + diff --git a/docs/generated-html/functions_c.html b/docs/generated-html/functions_c.html new file mode 100644 index 00000000..9da7dd06 --- /dev/null +++ b/docs/generated-html/functions_c.html @@ -0,0 +1,154 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- c -

+
+ + + + diff --git a/docs/generated-html/functions_d.html b/docs/generated-html/functions_d.html new file mode 100644 index 00000000..ca73b919 --- /dev/null +++ b/docs/generated-html/functions_d.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- d -

+
+ + + + diff --git a/docs/generated-html/functions_e.html b/docs/generated-html/functions_e.html new file mode 100644 index 00000000..ee616a11 --- /dev/null +++ b/docs/generated-html/functions_e.html @@ -0,0 +1,114 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- e -

+
+ + + + diff --git a/docs/generated-html/functions_enum.html b/docs/generated-html/functions_enum.html new file mode 100644 index 00000000..b710de0f --- /dev/null +++ b/docs/generated-html/functions_enum.html @@ -0,0 +1,89 @@ + + + + + + + +Cutlass: Class Members - Enumerations + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_eval.html b/docs/generated-html/functions_eval.html new file mode 100644 index 00000000..40c01ec8 --- /dev/null +++ b/docs/generated-html/functions_eval.html @@ -0,0 +1,172 @@ + + + + + + + +Cutlass: Class Members - Enumerator + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- a -

+ + +

- k -

+ + +

- m -

+ + +

- v -

+
+ + + + diff --git a/docs/generated-html/functions_f.html b/docs/generated-html/functions_f.html new file mode 100644 index 00000000..e2a60d90 --- /dev/null +++ b/docs/generated-html/functions_f.html @@ -0,0 +1,168 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- f -

+
+ + + + diff --git a/docs/generated-html/functions_func.html b/docs/generated-html/functions_func.html new file mode 100644 index 00000000..0a425c0a --- /dev/null +++ b/docs/generated-html/functions_func.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_0x7e.html b/docs/generated-html/functions_func_0x7e.html new file mode 100644 index 00000000..bf8de83a --- /dev/null +++ b/docs/generated-html/functions_func_0x7e.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- ~ -

+
+ + + + diff --git a/docs/generated-html/functions_func_b.html b/docs/generated-html/functions_func_b.html new file mode 100644 index 00000000..5d533d80 --- /dev/null +++ b/docs/generated-html/functions_func_b.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_c.html b/docs/generated-html/functions_func_c.html new file mode 100644 index 00000000..19541d87 --- /dev/null +++ b/docs/generated-html/functions_func_c.html @@ -0,0 +1,141 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- c -

+
+ + + + diff --git a/docs/generated-html/functions_func_d.html b/docs/generated-html/functions_func_d.html new file mode 100644 index 00000000..4c1c062a --- /dev/null +++ b/docs/generated-html/functions_func_d.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_e.html b/docs/generated-html/functions_func_e.html new file mode 100644 index 00000000..89f2b82a --- /dev/null +++ b/docs/generated-html/functions_func_e.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_f.html b/docs/generated-html/functions_func_f.html new file mode 100644 index 00000000..a614ede1 --- /dev/null +++ b/docs/generated-html/functions_func_f.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_g.html b/docs/generated-html/functions_func_g.html new file mode 100644 index 00000000..b3023736 --- /dev/null +++ b/docs/generated-html/functions_func_g.html @@ -0,0 +1,122 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- g -

+
+ + + + diff --git a/docs/generated-html/functions_func_h.html b/docs/generated-html/functions_func_h.html new file mode 100644 index 00000000..7eb85aac --- /dev/null +++ b/docs/generated-html/functions_func_h.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- h -

+
+ + + + diff --git a/docs/generated-html/functions_func_i.html b/docs/generated-html/functions_func_i.html new file mode 100644 index 00000000..16cfdc51 --- /dev/null +++ b/docs/generated-html/functions_func_i.html @@ -0,0 +1,163 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- i -

+
+ + + + diff --git a/docs/generated-html/functions_func_l.html b/docs/generated-html/functions_func_l.html new file mode 100644 index 00000000..c76f9fc5 --- /dev/null +++ b/docs/generated-html/functions_func_l.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_m.html b/docs/generated-html/functions_func_m.html new file mode 100644 index 00000000..2c68ec4f --- /dev/null +++ b/docs/generated-html/functions_func_m.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_o.html b/docs/generated-html/functions_func_o.html new file mode 100644 index 00000000..fb7b39f7 --- /dev/null +++ b/docs/generated-html/functions_func_o.html @@ -0,0 +1,193 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- o -

+
+ + + + diff --git a/docs/generated-html/functions_func_p.html b/docs/generated-html/functions_func_p.html new file mode 100644 index 00000000..8f1b5e8a --- /dev/null +++ b/docs/generated-html/functions_func_p.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_r.html b/docs/generated-html/functions_func_r.html new file mode 100644 index 00000000..96a33533 --- /dev/null +++ b/docs/generated-html/functions_func_r.html @@ -0,0 +1,99 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_s.html b/docs/generated-html/functions_func_s.html new file mode 100644 index 00000000..197958af --- /dev/null +++ b/docs/generated-html/functions_func_s.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- s -

+
+ + + + diff --git a/docs/generated-html/functions_func_t.html b/docs/generated-html/functions_func_t.html new file mode 100644 index 00000000..13b5b7ab --- /dev/null +++ b/docs/generated-html/functions_func_t.html @@ -0,0 +1,114 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_u.html b/docs/generated-html/functions_func_u.html new file mode 100644 index 00000000..2ca7d7a1 --- /dev/null +++ b/docs/generated-html/functions_func_u.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- u -

+
+ + + + diff --git a/docs/generated-html/functions_func_v.html b/docs/generated-html/functions_func_v.html new file mode 100644 index 00000000..ee4e6108 --- /dev/null +++ b/docs/generated-html/functions_func_v.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_w.html b/docs/generated-html/functions_func_w.html new file mode 100644 index 00000000..ef637faa --- /dev/null +++ b/docs/generated-html/functions_func_w.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- w -

+
+ + + + diff --git a/docs/generated-html/functions_g.html b/docs/generated-html/functions_g.html new file mode 100644 index 00000000..9493c5ce --- /dev/null +++ b/docs/generated-html/functions_g.html @@ -0,0 +1,231 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- g -

+
+ + + + diff --git a/docs/generated-html/functions_h.html b/docs/generated-html/functions_h.html new file mode 100644 index 00000000..4c7693db --- /dev/null +++ b/docs/generated-html/functions_h.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- h -

+
+ + + + diff --git a/docs/generated-html/functions_i.html b/docs/generated-html/functions_i.html new file mode 100644 index 00000000..a91cf4c4 --- /dev/null +++ b/docs/generated-html/functions_i.html @@ -0,0 +1,268 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- i -

+
+ + + + diff --git a/docs/generated-html/functions_k.html b/docs/generated-html/functions_k.html new file mode 100644 index 00000000..1aab81d8 --- /dev/null +++ b/docs/generated-html/functions_k.html @@ -0,0 +1,376 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- k -

+
+ + + + diff --git a/docs/generated-html/functions_l.html b/docs/generated-html/functions_l.html new file mode 100644 index 00000000..441d9d32 --- /dev/null +++ b/docs/generated-html/functions_l.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- l -

+
+ + + + diff --git a/docs/generated-html/functions_m.html b/docs/generated-html/functions_m.html new file mode 100644 index 00000000..1a9fe809 --- /dev/null +++ b/docs/generated-html/functions_m.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- m -

+
+ + + + diff --git a/docs/generated-html/functions_n.html b/docs/generated-html/functions_n.html new file mode 100644 index 00000000..bff5fbb3 --- /dev/null +++ b/docs/generated-html/functions_n.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_o.html b/docs/generated-html/functions_o.html new file mode 100644 index 00000000..b79e0bd1 --- /dev/null +++ b/docs/generated-html/functions_o.html @@ -0,0 +1,213 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- o -

+
+ + + + diff --git a/docs/generated-html/functions_p.html b/docs/generated-html/functions_p.html new file mode 100644 index 00000000..9131d011 --- /dev/null +++ b/docs/generated-html/functions_p.html @@ -0,0 +1,164 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- p -

+
+ + + + diff --git a/docs/generated-html/functions_r.html b/docs/generated-html/functions_r.html new file mode 100644 index 00000000..79d3a2e9 --- /dev/null +++ b/docs/generated-html/functions_r.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_s.html b/docs/generated-html/functions_s.html new file mode 100644 index 00000000..0f0af76a --- /dev/null +++ b/docs/generated-html/functions_s.html @@ -0,0 +1,383 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- s -

+
+ + + + diff --git a/docs/generated-html/functions_t.html b/docs/generated-html/functions_t.html new file mode 100644 index 00000000..1b83a091 --- /dev/null +++ b/docs/generated-html/functions_t.html @@ -0,0 +1,280 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- t -

+
+ + + + diff --git a/docs/generated-html/functions_type.html b/docs/generated-html/functions_type.html new file mode 100644 index 00000000..2241f00f --- /dev/null +++ b/docs/generated-html/functions_type.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- a -

+
+ + + + diff --git a/docs/generated-html/functions_type_b.html b/docs/generated-html/functions_type_b.html new file mode 100644 index 00000000..e092b1a8 --- /dev/null +++ b/docs/generated-html/functions_type_b.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- b -

+
+ + + + diff --git a/docs/generated-html/functions_type_c.html b/docs/generated-html/functions_type_c.html new file mode 100644 index 00000000..e797657a --- /dev/null +++ b/docs/generated-html/functions_type_c.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_d.html b/docs/generated-html/functions_type_d.html new file mode 100644 index 00000000..827859c2 --- /dev/null +++ b/docs/generated-html/functions_type_d.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- d -

+
+ + + + diff --git a/docs/generated-html/functions_type_e.html b/docs/generated-html/functions_type_e.html new file mode 100644 index 00000000..cac932d0 --- /dev/null +++ b/docs/generated-html/functions_type_e.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_f.html b/docs/generated-html/functions_type_f.html new file mode 100644 index 00000000..a71defeb --- /dev/null +++ b/docs/generated-html/functions_type_f.html @@ -0,0 +1,141 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- f -

+
+ + + + diff --git a/docs/generated-html/functions_type_g.html b/docs/generated-html/functions_type_g.html new file mode 100644 index 00000000..4ae366f3 --- /dev/null +++ b/docs/generated-html/functions_type_g.html @@ -0,0 +1,183 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- g -

+
+ + + + diff --git a/docs/generated-html/functions_type_i.html b/docs/generated-html/functions_type_i.html new file mode 100644 index 00000000..6be0ee90 --- /dev/null +++ b/docs/generated-html/functions_type_i.html @@ -0,0 +1,160 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- i -

+
+ + + + diff --git a/docs/generated-html/functions_type_l.html b/docs/generated-html/functions_type_l.html new file mode 100644 index 00000000..2e7334f0 --- /dev/null +++ b/docs/generated-html/functions_type_l.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_m.html b/docs/generated-html/functions_type_m.html new file mode 100644 index 00000000..043340a5 --- /dev/null +++ b/docs/generated-html/functions_type_m.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_n.html b/docs/generated-html/functions_type_n.html new file mode 100644 index 00000000..bb5ad36c --- /dev/null +++ b/docs/generated-html/functions_type_n.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_o.html b/docs/generated-html/functions_type_o.html new file mode 100644 index 00000000..42ed2813 --- /dev/null +++ b/docs/generated-html/functions_type_o.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- o -

+
+ + + + diff --git a/docs/generated-html/functions_type_p.html b/docs/generated-html/functions_type_p.html new file mode 100644 index 00000000..4e124be9 --- /dev/null +++ b/docs/generated-html/functions_type_p.html @@ -0,0 +1,112 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- p -

+
+ + + + diff --git a/docs/generated-html/functions_type_s.html b/docs/generated-html/functions_type_s.html new file mode 100644 index 00000000..2d67bf44 --- /dev/null +++ b/docs/generated-html/functions_type_s.html @@ -0,0 +1,278 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- s -

+
+ + + + diff --git a/docs/generated-html/functions_type_t.html b/docs/generated-html/functions_type_t.html new file mode 100644 index 00000000..54dffcad --- /dev/null +++ b/docs/generated-html/functions_type_t.html @@ -0,0 +1,227 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- t -

+
+ + + + diff --git a/docs/generated-html/functions_type_v.html b/docs/generated-html/functions_type_v.html new file mode 100644 index 00000000..60177eee --- /dev/null +++ b/docs/generated-html/functions_type_v.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_w.html b/docs/generated-html/functions_type_w.html new file mode 100644 index 00000000..ba510b56 --- /dev/null +++ b/docs/generated-html/functions_type_w.html @@ -0,0 +1,90 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_y.html b/docs/generated-html/functions_type_y.html new file mode 100644 index 00000000..e20abf82 --- /dev/null +++ b/docs/generated-html/functions_type_y.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_u.html b/docs/generated-html/functions_u.html new file mode 100644 index 00000000..fc2ce25c --- /dev/null +++ b/docs/generated-html/functions_u.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- u -

+
+ + + + diff --git a/docs/generated-html/functions_v.html b/docs/generated-html/functions_v.html new file mode 100644 index 00000000..ebea1a15 --- /dev/null +++ b/docs/generated-html/functions_v.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- v -

+
+ + + + diff --git a/docs/generated-html/functions_vars.html b/docs/generated-html/functions_vars.html new file mode 100644 index 00000000..43affc31 --- /dev/null +++ b/docs/generated-html/functions_vars.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_b.html b/docs/generated-html/functions_vars_b.html new file mode 100644 index 00000000..4b145d14 --- /dev/null +++ b/docs/generated-html/functions_vars_b.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_c.html b/docs/generated-html/functions_vars_c.html new file mode 100644 index 00000000..5e5a2251 --- /dev/null +++ b/docs/generated-html/functions_vars_c.html @@ -0,0 +1,89 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_d.html b/docs/generated-html/functions_vars_d.html new file mode 100644 index 00000000..df6d2e3e --- /dev/null +++ b/docs/generated-html/functions_vars_d.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_e.html b/docs/generated-html/functions_vars_e.html new file mode 100644 index 00000000..f98d4a1b --- /dev/null +++ b/docs/generated-html/functions_vars_e.html @@ -0,0 +1,87 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_f.html b/docs/generated-html/functions_vars_f.html new file mode 100644 index 00000000..71f57760 --- /dev/null +++ b/docs/generated-html/functions_vars_f.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_g.html b/docs/generated-html/functions_vars_g.html new file mode 100644 index 00000000..712c6b5d --- /dev/null +++ b/docs/generated-html/functions_vars_g.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_i.html b/docs/generated-html/functions_vars_i.html new file mode 100644 index 00000000..e0bfcd78 --- /dev/null +++ b/docs/generated-html/functions_vars_i.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- i -

+
+ + + + diff --git a/docs/generated-html/functions_vars_k.html b/docs/generated-html/functions_vars_k.html new file mode 100644 index 00000000..04e3c5d9 --- /dev/null +++ b/docs/generated-html/functions_vars_k.html @@ -0,0 +1,327 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- k -

+
+ + + + diff --git a/docs/generated-html/functions_vars_l.html b/docs/generated-html/functions_vars_l.html new file mode 100644 index 00000000..78a99ee3 --- /dev/null +++ b/docs/generated-html/functions_vars_l.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_m.html b/docs/generated-html/functions_vars_m.html new file mode 100644 index 00000000..d97d3377 --- /dev/null +++ b/docs/generated-html/functions_vars_m.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_n.html b/docs/generated-html/functions_vars_n.html new file mode 100644 index 00000000..60139fe6 --- /dev/null +++ b/docs/generated-html/functions_vars_n.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_p.html b/docs/generated-html/functions_vars_p.html new file mode 100644 index 00000000..41f8498f --- /dev/null +++ b/docs/generated-html/functions_vars_p.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- p -

+
+ + + + diff --git a/docs/generated-html/functions_vars_r.html b/docs/generated-html/functions_vars_r.html new file mode 100644 index 00000000..034ca3db --- /dev/null +++ b/docs/generated-html/functions_vars_r.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_s.html b/docs/generated-html/functions_vars_s.html new file mode 100644 index 00000000..52934eee --- /dev/null +++ b/docs/generated-html/functions_vars_s.html @@ -0,0 +1,144 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- s -

+
+ + + + diff --git a/docs/generated-html/functions_vars_t.html b/docs/generated-html/functions_vars_t.html new file mode 100644 index 00000000..4fdfb303 --- /dev/null +++ b/docs/generated-html/functions_vars_t.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_v.html b/docs/generated-html/functions_vars_v.html new file mode 100644 index 00000000..f773cd01 --- /dev/null +++ b/docs/generated-html/functions_vars_v.html @@ -0,0 +1,90 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_w.html b/docs/generated-html/functions_w.html new file mode 100644 index 00000000..9ef19c96 --- /dev/null +++ b/docs/generated-html/functions_w.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_y.html b/docs/generated-html/functions_y.html new file mode 100644 index 00000000..a97be07e --- /dev/null +++ b/docs/generated-html/functions_y.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- y -

+
+ + + + diff --git a/docs/generated-html/gemm_8h.html b/docs/generated-html/gemm_8h.html new file mode 100644 index 00000000..9996508b --- /dev/null +++ b/docs/generated-html/gemm_8h.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: gemm.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm.h File Reference
+
+
+ +

Implements a software-pipelined efficient GEMM. +More...

+
#include <cuda.h>
+#include <cutlass/coord.h>
+#include <cutlass/util/platform.h>
+
+

Go to the source code of this file.

+ + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmDesc< Scalar_, Index_ >
 
struct  cutlass::gemm::Gemm< GemmTraits_ >
 
struct  cutlass::gemm::Gemm< GemmTraits_ >::Params
 The params. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+ + + + +

+Functions

template<typename Gemm_ >
__global__ void cutlass::gemm::gemm_kernel (typename Gemm_::Params params)
 
+
+ + + + diff --git a/docs/generated-html/gemm_8h_source.html b/docs/generated-html/gemm_8h_source.html new file mode 100644 index 00000000..216e9b5a --- /dev/null +++ b/docs/generated-html/gemm_8h_source.html @@ -0,0 +1,130 @@ + + + + + + + +Cutlass: gemm.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #if !defined(__CUDACC_RTC__)
31 #include <cuda.h>
32 #endif
33 
34 #include <cutlass/coord.h>
35 #include <cutlass/util/platform.h>
36 
37 namespace cutlass {
38 namespace gemm {
39 
41 
42 template <typename Gemm_>
43 __global__ void gemm_kernel(typename Gemm_::Params params) {
44  // Declare shared memory.
45  __shared__ typename Gemm_::SharedStorage shared_storage;
46 
47  // Construct the GEMM object.
48  Gemm_ gemm(params, shared_storage);
49  // Run GEMM.
50  gemm.multiply_add();
51 }
52 
54 
55 template <typename Scalar_, typename Index_ = int>
56 struct GemmDesc {
58  Index_ m, n, k;
60  Scalar_ alpha, beta;
62  void const* d_a;
64  Index_ lda;
66  void const* d_b;
68  Index_ ldb;
70  void const* d_c;
72  Index_ ldc;
74  void* d_d;
76  Index_ ldd;
77 };
78 
80 
81 template <typename GemmTraits_>
82 struct Gemm {
86  typedef GemmTraits_ Traits;
88  typedef typename Traits::SharedStorage SharedStorage;
89 
91  typedef typename Traits::ScalarA ScalarA;
93  typedef typename Traits::ScalarB ScalarB;
95  typedef typename Traits::Epilogue::Scalar ScalarEpilogue;
97  typedef typename Traits::Epilogue::ScalarC ScalarC;
99  typedef typename Traits::Epilogue::ScalarD ScalarD;
101  typedef typename Traits::Index Index;
102 
104  static int const kThreads = Traits::GemmConfig::kThreads;
105 
107  struct Params : public Traits::Params {
109  Index n,
110  Index k,
111  ScalarEpilogue alpha,
112  ScalarA const* d_a,
113  Index lda,
114  ScalarB const* d_b,
115  Index ldb,
116  ScalarEpilogue beta,
117  ScalarC const* d_c,
118  Index ldc,
119  ScalarD* d_d,
120  Index ldd) {
122  desc.m = m;
123  desc.n = n;
124  desc.k = k;
125  desc.alpha = alpha;
126  desc.beta = beta;
127  desc.d_a = reinterpret_cast<void const*>(d_a);
128  desc.lda = lda;
129  desc.d_b = reinterpret_cast<void const*>(d_b);
130  desc.ldb = ldb;
131  desc.d_c = reinterpret_cast<void const*>(d_c);
132  desc.ldc = ldc;
133  desc.d_d = reinterpret_cast<void*>(d_d);
134  desc.ldd = ldd;
135  return Traits::Params::initialize(desc);
136  }
137  };
138 
139 #if !defined(__CUDACC_RTC__)
140  static __host__ cudaError_t launch(Params const& params,
142  cudaStream_t stream = cudaStreamDefault) {
143  // Setup the grid.
144  dim3 grid;
145  grid.x = (params.m + Traits::OutputTile::kW - 1) / Traits::OutputTile::kW;
146  grid.y = (params.n + Traits::OutputTile::kH - 1) / Traits::OutputTile::kH;
147 
148  // The number of threads.
149  dim3 block;
150  block.x = kThreads;
151 
152  // Launch the kernel.
153  void const* params_ = reinterpret_cast<void const*>(&params);
154 
155  return cudaLaunchKernel(reinterpret_cast<void*>(&gemm_kernel<This_>),
156  grid,
157  block,
158  const_cast<void**>(&params_),
159  0,
160  stream);
161  }
162 
164  static __host__ cudaError_t launch(CUfunction kernel,
165  Params const& params,
166  CUstream stream = CU_STREAM_LEGACY) {
167  // Setup the grid.
168  dim3 grid;
169  grid.x = (params.m + Traits::OutputTile::kW - 1) / Traits::OutputTile::kW;
170  grid.y = (params.n + Traits::OutputTile::kH - 1) / Traits::OutputTile::kH;
171 
172  // The number of threads.
173  dim3 block;
174  block.x = kThreads;
175 
176  // Launch the kernel.
177  void* params_[] = {const_cast<void*>(reinterpret_cast<void const*>(&params))};
178 
179  // return cudaLaunchKernel(reinterpret_cast<void*>(&gemm_kernel<This_>), grid, block,
180  // const_cast<void**>(&params_), 0, stream);
181  CUresult result = cuLaunchKernel(
182  kernel, grid.x, grid.y, grid.z, block.x, block.y, block.z, 0, stream, params_, 0);
183 
184  if (result != CUDA_SUCCESS) {
185  return cudaErrorLaunchFailure;
186  }
187  return cudaSuccess;
188  }
189 
190 #endif
191 
193  CUTLASS_DEVICE Gemm(Params const& params_, SharedStorage& shared_storage_)
194  : params(params_), shared_storage(shared_storage_) {}
195 
197  CUTLASS_DEVICE void multiply_add() {
198  // Swizzle the IDs of the block (to enable better cache behavior).
199  typename Traits::BlockSwizzle block_swizzle;
200  dim3 block = block_swizzle.swizzle();
201 
202  // Scale the id.
203  block.x *= Traits::OutputTile::kW;
204  block.y *= Traits::OutputTile::kH;
205 
206  // We may want to use shared memory to clear the registers.
207  typedef typename Traits::ClearAccumulators ClearAccumulators;
208 
209  // The streams to read A/B from global memory to shared memory.
210  typename Traits::GlobalLoadStream global_stream(params, shared_storage, block);
211 
212  // Create the accumulator clear.
213  ClearAccumulators clear(shared_storage.main_loop.clear);
214 
216  typedef typename Traits::MultiplyAdd MultiplyAdd;
217 
218  // By how much we unroll the main loop.
219  Index const kUnroll = static_cast<Index>(MultiplyAdd::AccumulatorsPerWarp::kD);
220 
221  // If we do not have enough steps in the main loop, trigger the residue code.
222  if (params.k < kUnroll) {
223  global_stream.residue(params.k, true);
224  }
225 
226  // Fetch the fragments for A and B from global memory.
227  global_stream.copy();
228 
229  // Copy the elements to shared memory (after transformation if needed).
230  global_stream.commit();
231 
232  // Make sure the data is in shared memory.
233  Traits::shared_store_fence(false);
234 
235  // The unrolling steps for the main loop.
236  int const kUnrollingSteps =
237  MultiplyAdd::AccumulatorsPerWarp::kD / MultiplyAdd::InstructionShape::kD;
238 
239  // Make sure we have at least 2 unrolling steps or our pipeling is not going to work.
240  static_assert(kUnrollingSteps >= 2, "The pipelining assumes at least two steps");
241 
242  // The stream of data from shared memory to fragments.
243  typename Traits::SharedLoadStream shared_load_stream(params, shared_storage);
244 
245  // Trigger the copy from shared memory for the 1st stream.
246  shared_load_stream.copy(0);
247 
248  // Allocate the accumulators.
249  typename MultiplyAdd::Accumulators accumulators;
250  // Clear the accumulators.
251  clear.clear(accumulators);
252 
253  // Enter the main loop and iterate.
254  typedef typename Traits::Index Index;
255  for (Index outer_k = params.k - kUnroll; outer_k > -kUnroll; outer_k -= kUnroll) {
256  // If that's the last "load iteration" update the predicates.
257  int const is_residue = outer_k <= kUnroll;
258  if (is_residue) {
259  global_stream.residue(outer_k);
260  }
261 
262  // Load data for the next iteration of the main loop.
263  global_stream.copy();
264 
266  for (int step = 0; step < kUnrollingSteps - 1; ++step) {
267  // Trigger the copy from shared memory for the next A/B values.
268  shared_load_stream.copy(step + 1);
269  // Make sure the values are available for the current iteration to do the multiply-add.
270  shared_load_stream.commit(step);
271 
272  // Do the math on the fragments of the current iteration.
273  MultiplyAdd multiply_add;
274  multiply_add.multiply_add(shared_load_stream.fragment_a(step),
275  shared_load_stream.fragment_b(step),
276  accumulators,
277  accumulators);
278  }
279 
280  // Make sure the data from shared memory has been entirely consumed.
281  Traits::shared_load_fence(true);
282 
283  // Commit the data in shared memory for A/B.
284  global_stream.commit();
285 
286  // Make sure the data is in shared memory.
287  Traits::shared_store_fence(true);
288 
289  // Move to the next stage for the load (if it makes sense).
290  shared_load_stream.inc_stage();
291  // Trigger the copy from shared memory for the next loop iteration.
292  shared_load_stream.copy(0);
293  // Make sure the values are available for the current iteration to do the multiply-add.
294  shared_load_stream.commit(kUnrollingSteps - 1);
295 
296  // Do the math on the fragments of the current iteration.
297  MultiplyAdd multiply_add;
298  multiply_add.multiply_add(shared_load_stream.fragment_a(kUnrollingSteps - 1),
299  shared_load_stream.fragment_b(kUnrollingSteps - 1),
300  accumulators,
301  accumulators);
302  }
303 
304  // Epilogue.
305  typedef typename Traits::Epilogue Epilogue;
306  Epilogue epilogue(params.epilogue, shared_storage.epilogue, params.m, params.n);
307  epilogue.epilogue(cutlass::make_Coord(0, block.y, block.x), accumulators);
308  }
309 
311  Params const& params;
314 };
315 
317 
318 } // namespace gemm
319 } // namespace cutlass
Definition: gemm.h:56
+
Definition: convert.h:33
+
SharedStorage & shared_storage
The shared storage.
Definition: gemm.h:313
+
Traits::Epilogue::ScalarD ScalarD
The scalar for D.
Definition: gemm.h:99
+
Scalar_ beta
Definition: gemm.h:60
+
Index_ k
Definition: gemm.h:58
+
Traits::SharedStorage SharedStorage
The shared storage.
Definition: gemm.h:88
+
The params.
Definition: gemm.h:107
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
Params const & params
The params.
Definition: gemm.h:311
+
Index_ m
The dimensions of the GEMM.
Definition: gemm.h:58
+
Traits::Epilogue::ScalarC ScalarC
The scalar for C.
Definition: gemm.h:97
+
Index_ ldb
The stride for B.
Definition: gemm.h:68
+
C++ features that may be otherwise unimplemented for CUDA device functions.
+
CUTLASS_DEVICE void multiply_add()
Do the GEMM.
Definition: gemm.h:197
+
GemmTraits_ Traits
The traits.
Definition: gemm.h:86
+
Traits::Epilogue::Scalar ScalarEpilogue
The scalar in the epilogue.
Definition: gemm.h:95
+
Index_ n
Definition: gemm.h:58
+
Traits::ScalarB ScalarB
The scalar for B.
Definition: gemm.h:93
+
Definition: clear_accumulators.h:38
+
void * d_d
The destination matrix D.
Definition: gemm.h:74
+
Definition: gemm.h:82
+
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:60
+
static __host__ cudaError_t launch(CUfunction kernel, Params const &params, CUstream stream=CU_STREAM_LEGACY)
Launch the kernel.
Definition: gemm.h:164
+
void const * d_a
The source matrix A.
Definition: gemm.h:62
+
__global__ void gemm_kernel(typename Gemm_::Params params)
Definition: gemm.h:43
+
CUTLASS_HOST_DEVICE int initialize(Index m, Index n, Index k, ScalarEpilogue alpha, ScalarA const *d_a, Index lda, ScalarB const *d_b, Index ldb, ScalarEpilogue beta, ScalarC const *d_c, Index ldc, ScalarD *d_d, Index ldd)
Definition: gemm.h:108
+
Index_ lda
The stride for A.
Definition: gemm.h:64
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
Gemm< GemmTraits_ > This_
This class.
Definition: gemm.h:84
+
Index_ ldc
The stride for C.
Definition: gemm.h:72
+
CUTLASS_DEVICE Gemm(Params const &params_, SharedStorage &shared_storage_)
Ctor.
Definition: gemm.h:193
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Index_ ldd
The stride for D.
Definition: gemm.h:76
+
Traits::ScalarA ScalarA
The scalar for A.
Definition: gemm.h:91
+
CUTLASS_DEVICE void clear(Fragment_ &fragment)
Clear the fragment.
Definition: clear_accumulators.h:47
+
static int const kThreads
The number of threads.
Definition: gemm.h:104
+
Scalar_ alpha
The alpha/beta scaling values.
Definition: gemm.h:60
+
void const * d_c
The source matrix C.
Definition: gemm.h:70
+
static __host__ cudaError_t launch(Params const &params, cudaStream_t stream=cudaStreamDefault)
Launch the kernel.
Definition: gemm.h:141
+
Traits::Index Index
The index.
Definition: gemm.h:101
+
void const * d_b
The source matrix B.
Definition: gemm.h:66
+
+ + + + diff --git a/docs/generated-html/gemm__epilogue_8h.html b/docs/generated-html/gemm__epilogue_8h.html new file mode 100644 index 00000000..ad65bf56 --- /dev/null +++ b/docs/generated-html/gemm__epilogue_8h.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: gemm_epilogue.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_epilogue.h File Reference
+
+
+ +

Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product. +More...

+
#include <cutlass/convert.h>
+#include <cutlass/coord.h>
+#include <cutlass/fragment.h>
+
+

Go to the source code of this file.

+ + + + +

+Classes

struct  cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+ + + + + + +

+Functions

template<typename T >
CUTLASS_DEVICE bool cutlass::gemm::is_zero (T x)
 
CUTLASS_DEVICE bool cutlass::gemm::is_zero (half x)
 
+
+ + + + diff --git a/docs/generated-html/gemm__epilogue_8h_source.html b/docs/generated-html/gemm__epilogue_8h_source.html new file mode 100644 index 00000000..c7be2683 --- /dev/null +++ b/docs/generated-html/gemm__epilogue_8h_source.html @@ -0,0 +1,130 @@ + + + + + + + +Cutlass: gemm_epilogue.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_epilogue.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 #include <cutlass/convert.h>
33 #include <cutlass/coord.h>
34 #include <cutlass/fragment.h>
35 
36 namespace cutlass {
37 namespace gemm {
38 
40 
41 template <typename T>
42 CUTLASS_DEVICE bool is_zero(T x) {
43  return x == T(0);
44 }
45 
46 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
47 CUTLASS_DEVICE bool is_zero(half x) { return reinterpret_cast<int16_t&>(x) == int16_t(0); }
48 #endif
49 
51 
52 template <typename GemmEpilogueTraits_>
53 struct GemmEpilogue {
55  typedef GemmEpilogueTraits_ Traits;
57  typedef typename Traits::Params Params;
59  typedef typename Traits::SharedStorage SharedStorage;
60 
62  typedef typename Traits::OutputTile OutputTile;
64  typedef typename Traits::Iterations Iterations;
66  typedef typename Traits::Accumulators Accumulators;
68  typedef typename Traits::Scalar Scalar;
70  typedef typename Traits::Functor Functor;
71 
73  static_assert(Iterations::kD == 1 && Iterations::kC == 1, "Unsupported 3D/4D shapes");
74 
76  typedef typename Traits::GlobalLoadIteratorC GlobalLoadIteratorC;
78  typedef typename Traits::GlobalTransformerC GlobalTransformerC;
80  typedef typename Traits::GlobalTransformerD GlobalTransformerD;
82  typedef typename Traits::GlobalStoreIteratorD GlobalStoreIteratorD;
84  typedef typename Traits::SharedStoreIteratorD SharedStoreIteratorD;
86  typedef typename Traits::SharedStoreTransformerD SharedStoreTransformerD;
88  typedef typename Traits::SharedLoadIteratorD SharedLoadIteratorD;
91 
93  typedef typename Traits::Index Index;
94 
96  typedef typename GlobalLoadIteratorC::Scalar ScalarC;
98  typedef typename GlobalStoreIteratorD::Scalar ScalarD;
99 
101  CUTLASS_DEVICE GemmEpilogue(Params const& params_,
102  SharedStorage& shared_storage_,
103  Index m_,
104  Index n_)
105  : params(params_), shared_storage(shared_storage_), m(m_), n(n_) {}
106 
108  CUTLASS_DEVICE void epilogue(Coord<3> const& block, Accumulators& accumulators) {
109  if (is_zero(params.functor.beta)) {
110  epilogue_with_or_without_beta<true>(block, accumulators);
111  } else {
112  epilogue_with_or_without_beta<false>(block, accumulators);
113  }
114  }
115 
116  template <bool kBetaIsZero_>
117  CUTLASS_DEVICE void epilogue_with_or_without_beta(Coord<3> const& block,
118  Accumulators& accumulators) {
119 
120  Coord<3> const bounds = cutlass::make_Coord(0, n, m);
121 
122  // The functor.
123  Functor functor(params.functor);
124  // The C fragment.
125  typename GlobalLoadIteratorC::Fragment fragment_c;
126  // The transformed C fragment.
127  typename GlobalTransformerC::OutputFragment transformed_c;
128 
130  for (int h = 0; h < Iterations::kH; ++h) {
131  // Compute pointer and predicate offsets for C and D global iterators.
132  int const pointer_offset =
133  ((params.iterator_d.inc_h * (GlobalStoreIteratorD::Iterations::kH - 1) +
134  params.iterator_d.inc_advance) *
135  Iterations::kW +
136  params.stride_h) *
137  h;
138  int const predicate_offset =
139  ((params.iterator_d.predicate_inc_h * (GlobalStoreIteratorD::Iterations::kH - 1) +
140  params.iterator_d.predicate_inc_advance) *
141  Iterations::kW +
142  Traits::Delta::kH) *
143  h;
144 
145  // The iterator to load the elements of the C matrix.
146  GlobalLoadIteratorC global_load_iterator(
147  params.iterator_c, bounds, block, pointer_offset, predicate_offset);
148  // The transformer for C.
149  GlobalTransformerC transformer_c;
150  // The transformer for D.
151  GlobalTransformerD transformer_d;
152  // The iterator to store into the D matrix.
153  GlobalStoreIteratorD global_store_iterator(
154  params.iterator_d, bounds, block, pointer_offset, predicate_offset);
155 
157  for (int w = 0; w < Iterations::kW; ++w) {
158  // Load the C matrix into fragment.
159  if (!kBetaIsZero_) {
160  iterator_load(global_load_iterator, fragment_c);
161  }
162 
163  // Make sure we can write to shared memory.
165 
166  // Copy the accumulators to shared memory.
167  int const offset = (h * Iterations::kW + w) * SharedStoreIteratorD::Fragment::kElements;
168 
169  SharedStoreTransformerD shared_store_transformer;
170  typename SharedStoreTransformerD::OutputFragment shared_store_transformed_d;
171  shared_store_transformer.transform(accumulators, offset, shared_store_transformed_d);
172 
173  SharedStoreIteratorD shared_store_iterator(params.shared_store_iterator_d,
174  shared_storage.shared_stream.store);
175  shared_iterator_store(shared_store_iterator, shared_store_transformed_d);
176 
177  // Make sure the data is in shared memory.
179 
180  // Copy the accumulators back to registers from shared memory.
181  SharedLoadIteratorD shared_load_iterator(params.shared_load_iterator_d,
182  shared_storage.shared_stream.load);
183  typename SharedLoadIteratorD::Fragment fetched_d;
184  shared_iterator_load(shared_load_iterator, fetched_d);
185 
186  // Do the math.
187  typename GlobalTransformerD::InputFragment fragment_d;
188 
189  if (kBetaIsZero_) {
190  functor.evaluate(fetched_d, fragment_d);
191  } else {
192  // Transform C fragment.
193  transformer_c.transform(fragment_c, transformed_c);
194  // Do the math.
195  functor.evaluate(fetched_d, transformed_c, fragment_d);
196  }
197 
198  // Transform D fragment.
199  typename GlobalTransformerD::OutputFragment transformed_d;
200  transformer_d.transform(fragment_d, transformed_d);
201 
202  // Copy the results to global memory.
203  iterator_store(global_store_iterator, transformed_d);
204  }
205  }
206  }
207 
209  CUTLASS_DEVICE void shared_load_fence() { __syncthreads(); }
210 
212  CUTLASS_DEVICE void shared_store_fence() { __syncthreads(); }
213 
215  Params const& params;
220 };
221 
223 
224 } // namespace gemm
225 } // namespace cutlass
GlobalStoreIteratorD::Scalar ScalarD
The scalar for D.
Definition: gemm_epilogue.h:98
+
Traits::SharedStoreIteratorD SharedStoreIteratorD
The iterator to store D in shared memory.
Definition: gemm_epilogue.h:84
+
Definition: convert.h:33
+
CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from a shared memory input iterator.
Definition: iterator_access.h:75
+
Traits::Params Params
The params.
Definition: gemm_epilogue.h:57
+
Definition: gemm_epilogue.h:53
+
CUTLASS_DEVICE void epilogue_with_or_without_beta(Coord< 3 > const &block, Accumulators &accumulators)
Definition: gemm_epilogue.h:117
+
CUTLASS_DEVICE GemmEpilogue(Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)
Ctor.
Definition: gemm_epilogue.h:101
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
Definition: convert.h:69
+
Traits::SharedStorage SharedStorage
The shared storage.
Definition: gemm_epilogue.h:59
+
Traits::GlobalTransformerD GlobalTransformerD
The transformer for D.
Definition: gemm_epilogue.h:80
+
Traits::OutputTile OutputTile
The output tile.
Definition: gemm_epilogue.h:62
+
Traits::Accumulators Accumulators
The accumulators.
Definition: gemm_epilogue.h:66
+
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:60
+
CUTLASS_DEVICE void shared_load_fence()
The memory fence for shared loads.
Definition: gemm_epilogue.h:209
+
SharedStorage & shared_storage
The shared storage.
Definition: gemm_epilogue.h:217
+
GemmEpilogueTraits_ Traits
The traits class.
Definition: gemm_epilogue.h:55
+
CUTLASS_DEVICE bool is_zero(T x)
Definition: gemm_epilogue.h:42
+
Params const & params
The params.
Definition: gemm_epilogue.h:215
+
Traits::SharedLoadIteratorD SharedLoadIteratorD
The iterator to load D in shared memory.
Definition: gemm_epilogue.h:88
+
Traits::Index Index
The index.
Definition: gemm_epilogue.h:93
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Traits::SharedStoreTransformerD SharedStoreTransformerD
The shared store transformer for D.
Definition: gemm_epilogue.h:86
+
CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment)
Stores a fragment to a shared memory output iterator.
Definition: iterator_access.h:228
+
Traits::GlobalStoreIteratorD GlobalStoreIteratorD
The iterator for D in global memory.
Definition: gemm_epilogue.h:82
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
Stores a fragment to an output iterator.
Definition: iterator_access.h:193
+
GlobalLoadIteratorC::Scalar ScalarC
The scalar for C.
Definition: gemm_epilogue.h:96
+
Index n
Definition: gemm_epilogue.h:219
+
CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from an input iterator.
Definition: iterator_access.h:41
+
Traits::Functor Functor
The functor in charge of the math.
Definition: gemm_epilogue.h:70
+
Traits::Iterations Iterations
The number of iterations.
Definition: gemm_epilogue.h:64
+
CUTLASS_DEVICE void epilogue(Coord< 3 > const &block, Accumulators &accumulators)
Execute the epilogue.
Definition: gemm_epilogue.h:108
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
Copy< typename SharedLoadIteratorD::Fragment > SharedLoadTransformerD
The shared load transformer for D.
Definition: gemm_epilogue.h:90
+
Traits::Scalar Scalar
The scalar.
Definition: gemm_epilogue.h:68
+
Defines conversion operations among Fragments of different base type.
+
Index m
The dimensions of the GEMM.
Definition: gemm_epilogue.h:219
+
CUTLASS_DEVICE void shared_store_fence()
The memory fence for shared stores.
Definition: gemm_epilogue.h:212
+
Traits::GlobalTransformerC GlobalTransformerC
The transformer for C.
Definition: gemm_epilogue.h:78
+
Traits::GlobalLoadIteratorC GlobalLoadIteratorC
We do not support 3D or 4D shapes.
Definition: gemm_epilogue.h:73
+
+ + + + diff --git a/docs/generated-html/gemm__epilogue__traits_8h.html b/docs/generated-html/gemm__epilogue__traits_8h.html new file mode 100644 index 00000000..cdb98636 --- /dev/null +++ b/docs/generated-html/gemm__epilogue__traits_8h.html @@ -0,0 +1,128 @@ + + + + + + + +Cutlass: gemm_epilogue_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_epilogue_traits.h File Reference
+
+
+ +

Defines structural properties of the GEMM epilogue. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
 
struct  cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
 The params. More...
 
union  cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage
 The shared memory storage to exchange data. More...
 
struct  cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage
 The shared memory to swizzle the data in the epilogue. More...
 
struct  cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
 
struct  cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__epilogue__traits_8h_source.html b/docs/generated-html/gemm__epilogue__traits_8h_source.html new file mode 100644 index 00000000..3e10e801 --- /dev/null +++ b/docs/generated-html/gemm__epilogue__traits_8h_source.html @@ -0,0 +1,160 @@ + + + + + + + +Cutlass: gemm_epilogue_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_epilogue_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/convert.h>
31 #include <cutlass/coord.h>
35 #include <cutlass/reshape_tile.h>
36 #include <cutlass/tile_iterator.h>
37 
38 namespace cutlass {
39 namespace gemm {
40 
42 
43 template <
45  typename OutputTile_,
47  typename Accumulators_,
49  typename GlobalLoadIteratorC_,
51  typename GlobalTransformerC_,
53  typename GlobalTransformerD_,
55  typename GlobalStoreIteratorD_,
57  typename SharedStoreIteratorD_,
59  typename SharedStoreTransformerD_,
61  typename SharedLoadIteratorD_,
63  typename Iterations_,
65  typename Delta_,
67  typename Functor_,
69  typename Index_ = int>
71  //
73  typedef OutputTile_ OutputTile;
76  typedef Accumulators_ Accumulators;
78  typedef GlobalLoadIteratorC_ GlobalLoadIteratorC;
80  typedef GlobalTransformerC_ GlobalTransformerC;
82  typedef GlobalTransformerD_ GlobalTransformerD;
84  typedef GlobalStoreIteratorD_ GlobalStoreIteratorD;
86  typedef SharedStoreIteratorD_ SharedStoreIteratorD;
88  typedef SharedStoreTransformerD_ SharedStoreTransformerD;
90  typedef SharedLoadIteratorD_ SharedLoadIteratorD;
92  typedef Iterations_ Iterations;
94  typedef Delta_ Delta;
95 
97  typedef Functor_ Functor;
99  typedef Index_ Index;
100 
102  static_assert(Iterations::kD == 1 && Iterations::kC == 1, "Unsupported 3D/4D shapes");
103 
105  typedef typename Functor::Scalar Scalar;
107  typedef typename GlobalLoadIteratorC::Scalar ScalarC;
109  typedef typename GlobalStoreIteratorD::Scalar ScalarD;
110 
112  struct Params {
116  typename GlobalLoadIteratorC::Params iterator_c;
118  typename GlobalStoreIteratorD::Params iterator_d;
120  typename SharedStoreIteratorD::Params shared_store_iterator_d;
122  typename SharedLoadIteratorD::Params shared_load_iterator_d;
124  typename Functor::Params functor;
125 
127  template <typename GemmDesc_>
128  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) {
129  // The parameters for the functor.
130  int error_code = functor.initialize(desc);
131  if (error_code) {
132  return error_code;
133  }
134 
135  // At the end of the H iteration, we jump over a number of columns.
136  this->stride_h = desc.ldd * Delta::kH;
137  // Nothing to do here.
138  this->stride_w = 0;
139 
140  // Setup the params for the global memory iterator for C.
141  error_code = iterator_c.initialize(
142  reinterpret_cast<ScalarC const*>(desc.d_c), desc.ldc, desc.n, stride_w, Delta::kW);
143  if (error_code) {
144  return error_code;
145  }
146 
147  // Setup the params for the global memory iterator for D.
148  return iterator_d.initialize(
149  reinterpret_cast<ScalarD*>(desc.d_d), desc.ldd, desc.n, stride_w, Delta::kW);
150  }
151  };
152 
155  // The storage for the store iterator.
156  typename SharedStoreIteratorD::SharedStorage store;
157  // The storage for the store iterator.
158  typename SharedLoadIteratorD::SharedStorage load;
159  };
160 
162  struct SharedStorage {
163  // The storage for the shared stream D.
165  };
166 };
167 
169 
170 template <typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
173  typedef typename EpilogueFunctor_::Scalar Scalar;
175  typedef typename GemmConfig_::OutputTile OutputTile;
176 
178  typedef Shape<1,
179  GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH /
180  GemmConfig_::kAccumulatorsPerLdsB,
181  GemmConfig_::kAccumulatorsPerLdsB>
183  // The iteration strides in the H/W dimension.
184  typedef Shape<0,
185  GemmConfig_::kAccumulatorsPerLdsB*(
186  GemmConfig_::Warps::kH* GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1),
187  0>
190  typedef EpilogueFunctor_ Functor;
191 
194  // The pointer is float.
195  typename Functor::Scalar,
196  // The output tile size.
197  typename GemmConfig_::OutputTile,
198  // The number of warps.
199  typename GemmConfig_::Warps,
200  // The number of threads per warp.
201  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
202  // The number of scalars per STS.
203  GemmConfig_::kScalarsPerStsD,
204  // The skew -- 128 / sizeof(ScalarD) / kScalarsPerStsD is the number of threads involved in
205  // a single STS. We divide by 2 as our objective is to add a skew to the odd threads to
206  // avoid bank conflicts between odd and even threads.
207  128 / sizeof(typename GemmConfig_::ScalarD) / GemmConfig_::kScalarsPerStsD / 2 *
208  GemmConfig_::kScalarsPerStsD>
210 
217 
220 
223  // The pointer is float.
224  typename Functor::Scalar,
225  // The output tile size.
226  typename GemmConfig_::OutputTile,
227  // The number of warps.
228  typename GemmConfig_::Warps,
229  // The number of threads per warp.
230  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
231  // The number of columns of the output tile written by iteration.
232  GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount,
233  // The number of scalars per LDS.
234  GemmConfig_::kScalarsPerLdsD,
235  // The skew.
238 
245 
247  typedef GemmGlobalTileCdTraits<
248  // The pointer is float const.
249  typename GemmConfig_::ScalarC const,
250  // The tile has size (N / Iterations)xM in GEMM's terminology.
251  Shape<1,
252  GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount,
253  GemmConfig_::OutputTile::kW>,
254  // The threads are distributed as warps x 32 (the traits may reorganize).
256  // How many elements do we jump over at each iteration?
258  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
259  GemmConfig_::kScalarsPerLdgC>
261 
266 
268  typedef GemmGlobalTileCdTraits<
269  // The pointer is float.
270  typename GemmConfig_::ScalarD,
271  // The tile has size (N / Iterations)xM in GEMM's terminology.
272  Shape<1,
273  GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount,
274  GemmConfig_::OutputTile::kW>,
275  // The threads are distributed as warps x 32 (the traits may reorganize).
277  // How many elements do we jump over at each iteration?
279  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
280  GemmConfig_::kScalarsPerStgD>
282 
287 };
288 
290 
291 template <
293  typename GemmConfig_,
295  typename EpilogueFunctor_,
297  typename Index_ = int,
301  // The output tile.
302  typename GemmConfig_::OutputTile,
303  // The accumulators.
304  typename GemmConfig_::Accumulators,
305  // The global iterator for C.
306  typename Helper_::GlobalLoadIteratorC,
307  // The transformer for C.
308  typename Helper_::GlobalTransformerC,
309  // The transformer for D.
310  typename Helper_::GlobalTransformerD,
311  // The global iterator for D.
312  typename Helper_::GlobalStoreIteratorD,
313  // The iterator to store D to shared memory.
314  typename Helper_::SharedStoreIteratorD,
315  // The shared store transformer for D.
316  typename Helper_::SharedStoreTransformerD,
317  // The iterator to load D from shared memory.
318  typename Helper_::SharedLoadIteratorD,
319  // The number of iterations.
320  typename Helper_::Iterations,
321  // The strides between iterations.
322  typename Helper_::Delta,
323  // The functor to be used in the epilogue.
324  EpilogueFunctor_,
325  // The index.
326  Index_> {};
327 
329 
330 } // namespace gemm
331 } // namespace cutlass
Definition: gemm_global_tile.h:116
+
SharedLoadIteratorD::SharedStorage load
Definition: gemm_epilogue_traits.h:158
+
Delta_ Delta
The iterations strides.
Definition: gemm_epilogue_traits.h:94
+
Definition: load_store.h:42
+
Definition: convert.h:33
+
GemmGlobalTileCdTraits< typename GemmConfig_::ScalarC const, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerLdgC > GlobalLoadTileTraits
The traits class to build the iterator to load data from global memory for C^N.
Definition: gemm_epilogue_traits.h:260
+
Definition: gemm_epilogue_traits.h:171
+
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
+
GlobalTransformerC_ GlobalTransformerC
The transformer for C.
Definition: gemm_epilogue_traits.h:80
+
GlobalStoreIteratorD::Params iterator_d
The params for the D global iterator.
Definition: gemm_epilogue_traits.h:118
+
Implements the BLAS linear scaling function alpha*AB + beta*C.
+
The shared memory storage to exchange data.
Definition: gemm_epilogue_traits.h:154
+
GlobalLoadIteratorC::Scalar ScalarC
The scalar for C.
Definition: gemm_epilogue_traits.h:107
+
EpilogueFunctor_::Scalar Scalar
The scalar.
Definition: gemm_epilogue_traits.h:173
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
Index stride_h
The strides for H and W in the different iterations of the epilogue.
Definition: gemm_epilogue_traits.h:114
+
Index_ Index
The index.
Definition: gemm_epilogue_traits.h:99
+
GlobalStoreIteratorD_ GlobalStoreIteratorD
The iterator for D in global memory.
Definition: gemm_epilogue_traits.h:84
+
Definition: convert.h:69
+
OutputTile_ OutputTile
The output tile.
Definition: gemm_epilogue_traits.h:73
+
GemmGlobalIteratorCd< GlobalLoadTileTraits, Index_ > GlobalLoadIteratorC
The iterator to load C.
Definition: gemm_epilogue_traits.h:263
+
Definition: tile_iterator.h:62
+
GlobalStoreIteratorD::Scalar ScalarD
The scalar for D.
Definition: gemm_epilogue_traits.h:109
+
TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorD
The iterator to store D to shared memory.
Definition: gemm_epilogue_traits.h:216
+
GemmGlobalIteratorCd< GlobalStoreTileTraits, Index_ > GlobalStoreIteratorD
The iterator to store D.
Definition: gemm_epilogue_traits.h:284
+
Copy< typename SharedStoreIteratorD::Fragment > SharedStoreTransformerD
The shared store transformer for D.
Definition: gemm_epilogue_traits.h:219
+
Shape< 1, GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH/GemmConfig_::kAccumulatorsPerLdsB, GemmConfig_::kAccumulatorsPerLdsB > Iterations
The number of iterations in the epilogue.
Definition: gemm_epilogue_traits.h:182
+
GlobalLoadIteratorC::Params iterator_c
The params for the C iterator.
Definition: gemm_epilogue_traits.h:116
+
SharedStoreTransformerD_ SharedStoreTransformerD
The shared store transformer for D.
Definition: gemm_epilogue_traits.h:88
+
CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc)
Setup the params.
Definition: gemm_epilogue_traits.h:128
+
GemmGlobalTileCdTraits< typename GemmConfig_::ScalarD, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerStgD > GlobalStoreTileTraits
The traits class to build the iterator to store data to global memory for D^N.
Definition: gemm_epilogue_traits.h:281
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+
SharedStoreIteratorD_ SharedStoreIteratorD
The iterator to store D in shared memory.
Definition: gemm_epilogue_traits.h:86
+
SharedStoreIteratorD::SharedStorage store
Definition: gemm_epilogue_traits.h:156
+
GemmSharedStoreTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::kScalarsPerStsD, 128/sizeof(typename GemmConfig_::ScalarD)/GemmConfig_::kScalarsPerStsD/2 *GemmConfig_::kScalarsPerStsD > SharedStoreTileTraits
The traits class to build the iterator to store to shared memory for D.
Definition: gemm_epilogue_traits.h:209
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:278
+
Defines a type for restructuring a tile.
+
Iterations_ Iterations
typedef typename GemmConfig::EpilogueIterations Iterations;
Definition: gemm_epilogue_traits.h:92
+
Definition: gemm_shared_tile.h:335
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Definition: gemm_epilogue_traits.h:300
+
StreamSharedStorage shared_stream
Definition: gemm_epilogue_traits.h:164
+
EpilogueFunctor_ Functor
The functor to do the math in the epilogue.
Definition: gemm_epilogue_traits.h:190
+
TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorD
The iterator to load D from shared memory.
Definition: gemm_epilogue_traits.h:244
+
GemmConfig_::OutputTile OutputTile
The output tile.
Definition: gemm_epilogue_traits.h:175
+
GlobalLoadIteratorC_ GlobalLoadIteratorC
The iterator for C in global memory.
Definition: gemm_epilogue_traits.h:78
+
GlobalTransformerD_ GlobalTransformerD
The transformer for D.
Definition: gemm_epilogue_traits.h:82
+
Definition: gemm_epilogue_traits.h:70
+
Definition: gemm_global_tile.h:348
+
Index stride_w
Definition: gemm_epilogue_traits.h:114
+
static int const kW
The width of the cube.
Definition: shape.h:70
+
Functor::Scalar Scalar
We do not support 3D or 4D shapes.
Definition: gemm_epilogue_traits.h:102
+
Copy< typename GlobalStoreIteratorD::Fragment > GlobalTransformerD
The transformer for D.
Definition: gemm_epilogue_traits.h:286
+
Implements efficient loading of the thread block-level tile from global memory and storing to shared ...
+
The params.
Definition: gemm_epilogue_traits.h:112
+
The shared memory to swizzle the data in the epilogue.
Definition: gemm_epilogue_traits.h:162
+
SharedLoadIteratorD_ SharedLoadIteratorD
The iterator to store D in shared memory.
Definition: gemm_epilogue_traits.h:90
+
Functor::Params functor
The functor params.
Definition: gemm_epilogue_traits.h:124
+
Copy< typename GlobalLoadIteratorC::Fragment > GlobalTransformerC
The transformer for C.
Definition: gemm_epilogue_traits.h:265
+
SharedLoadIteratorD::Params shared_load_iterator_d
The params for the D shared load iterator.
Definition: gemm_epilogue_traits.h:122
+
GemmSharedLoadTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::kScalarsPerLdsD, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for D.
Definition: gemm_epilogue_traits.h:237
+
Accumulators_ Accumulators
Definition: gemm_epilogue_traits.h:76
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:266
+
Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEM...
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
Defines conversion operations among Fragments of different base type.
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:337
+
SharedStoreIteratorD::Params shared_store_iterator_d
The params for the D shared store iterator.
Definition: gemm_epilogue_traits.h:120
+
Functor_ Functor
The functor in charge of the math.
Definition: gemm_epilogue_traits.h:97
+
Definition: gemm_shared_tile.h:264
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
+
+ + + + diff --git a/docs/generated-html/gemm__fragment__stream_8h.html b/docs/generated-html/gemm__fragment__stream_8h.html new file mode 100644 index 00000000..6c4bbdec --- /dev/null +++ b/docs/generated-html/gemm__fragment__stream_8h.html @@ -0,0 +1,119 @@ + + + + + + + +Cutlass: gemm_fragment_stream.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_fragment_stream.h File Reference
+
+
+ +

GEMM Fragment Stream maps the dimensions of the GEMM problem to the generic fragment stream. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
 Defines a FragmentStream by mapping GEMM dimensions onto contiguous and strided dimensions. More...
 
struct  cutlass::gemm::GemmFragmentStream< Traits_ >
 GEMM Fragment Stream. More...
 
struct  cutlass::gemm::GemmFragmentStream< Traits_ >::Params
 Parameters object. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__fragment__stream_8h_source.html b/docs/generated-html/gemm__fragment__stream_8h_source.html new file mode 100644 index 00000000..db383ae4 --- /dev/null +++ b/docs/generated-html/gemm__fragment__stream_8h_source.html @@ -0,0 +1,148 @@ + + + + + + + +Cutlass: gemm_fragment_stream.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_fragment_stream.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
31 #include <cutlass/matrix_traits.h>
32 
35 
36 namespace cutlass {
37 namespace gemm {
38 
40 
42 template <GemmOperand::Kind Usage, typename Scalar_, MatrixLayout::Kind Layout,
43  typename ThreadBlockTile_, int Threads, int ScalarsPerInst, typename Index_ = int,
44  typename DestinationSkew_ = Shape<0, 0, 0, 0> >
47  static GemmOperand::Kind const kUsage = Usage;
48 
50  typedef Scalar_ Scalar;
51 
53  static MatrixLayout::Kind const kLayout = Layout;
54 
56  typedef ThreadBlockTile_ ThreadBlockTile;
57 
59  static int const kThreads = Threads;
60 
62  static int const kAccessSize = ScalarsPerInst;
63 
65  typedef Index_ Index;
66 
68  typedef typename ShapeDiv<DestinationSkew_, Shape<ScalarsPerInst, ScalarsPerInst, ScalarsPerInst,
70 
73 
76 
79 
81  typedef TileTraitsDefault<VectorizedTile, kThreads> TileTraits;
82 
84  typedef FragmentStream<
85  TileTraits,
94 };
95 
97 template <typename Traits_>
99  : public FragmentStream<
100  typename Traits_::TileTraits,
101  TileLoadIterator<typename Traits_::TileTraits, typename Traits_::Scalar,
102  Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH
103  : IteratorAdvance::kW,
104  MemorySpace::kGlobal, typename Traits_::Index>,
105  TileStoreIterator<typename Traits_::TileTraits, typename Traits_::Scalar,
106  Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH
107  : IteratorAdvance::kW,
108  MemorySpace::kShared, typename Traits_::Index, typename Traits_::Scalar,
109  IteratorFragment::kScalar, typename Traits_::DestinationSkew> > {
111  typedef Traits_ Traits;
112 
114  typedef typename Traits::FragmentStream Base;
115 
116  //
117  // FragmentStream concept
118  //
119 
121  typedef typename Traits::Scalar Scalar;
122 
124  typedef typename Base::LoadIterator LoadIterator;
125 
127  typedef typename Base::StoreIterator StoreIterator;
128 
130  typedef typename Base::Convert Convert;
131 
133  typedef typename Base::Fragment Fragment;
134 
136  typedef typename Base::StoreFragment StoreFragment;
137 
139  typedef typename Base::Storage Storage;
140 
141  // Parameters type
142  // typedef typename Base::Params BaseParams;
143 
145  typedef typename Traits::Index Index;
146 
147  //
148  // Nested class definitions
149  //
150 
152  typedef typename Traits::Scalar const *Pointer;
153 
155  struct Params : public Base::Params {
156  //
157  // Methods
158  //
159 
161  template <typename GemmDesc_>
162  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc,
163  typename Traits::Scalar const *pointer, Index ldm) {
164  return this->load_params.initialize(pointer, ldm * Traits::MultiplicandTraits::Shape::kH, ldm,
165  Traits::kAccessSize);
166  }
167  };
168 
169  //
170  // Static member functions
171  //
172 
174  static CUTLASS_DEVICE void shared_store_fence() { Base::shared_store_fence(); }
175 
176  //
177  // Methods
178  //
179 
180  CUTLASS_DEVICE
182 
184  CUTLASS_DEVICE
185  GemmFragmentStream(Params const &params, Coord<3> const &bounds,
186  Coord<3> const &block_offset = make_Coord(0, 0, 0))
187  : Base(params, ProjectOperand<Traits::kUsage, Traits::MultiplicandTraits::kKstrided>::project(
188  bounds) +
189  make_Coord(1, 0, 0),
190  ProjectOperand<Traits::kUsage, Traits::MultiplicandTraits::kKstrided>::project(
191  block_offset)) {}
192 
194  CUTLASS_DEVICE
195  void load() { Base::load(); }
196 
198  CUTLASS_DEVICE
199  void commit() { Base::commit(); }
200 
202  CUTLASS_DEVICE
203  void residue(Coord<3> const &bounds, Coord<3> const &block_offset) {
204  this->initialize_predicates(bounds, block_offset);
205 
206  this->fetch.clear();
207  }
208 
210  CUTLASS_DEVICE
211  void initialize_predicates(Coord<3> const &bounds, Coord<3> const &block_offset) {
212  Base::initialize_predicates(
214  make_Coord(1, 0, 0),
216  block_offset));
217  }
218 };
219 
221 }
222 }
nv_std::conditional< kKstrided, Shape< 1, ThreadBlockTile::kD, GetExtent< Usage, ThreadBlockTile >::kExtent >, Shape< 1, GetExtent< Usage, ThreadBlockTile >::kExtent, ThreadBlockTile::kD > >::type Shape
Map the ThreadBlockShape onto (kH, kW) dimensions for A and B operand.
Definition: gemm_operand.h:86
+
static bool const kKstrided
Definition: gemm_operand.h:81
+
Scalar_ Scalar
Scalar data type.
Definition: gemm_fragment_stream.h:50
+
GemmMultiplicandTraits< ThreadBlockTile, kUsage, kLayout > MultiplicandTraits
Traits of multiplicand.
Definition: gemm_fragment_stream.h:72
+
static int const kAccessSize
Scalars per instruction.
Definition: gemm_fragment_stream.h:62
+
Definition: load_store.h:42
+
Definition: convert.h:34
+
Base::StoreIterator StoreIterator
Defines the store iterator.
Definition: gemm_fragment_stream.h:127
+
CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc, typename Traits::Scalar const *pointer, Index ldm)
Initializes parameters.
Definition: gemm_fragment_stream.h:162
+
Defines structural properties of complete GEMM computation.
+
Traits::FragmentStream Base
Base class.
Definition: gemm_fragment_stream.h:114
+
An abstraction for implementing a stream loading a tile and storing a tile using a pair of tile itera...
+
Definition: load_store.h:43
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
ReshapeTile< ScalarTile, kAccessSize >::Tile VectorizedTile
Reshape for vectorized access.
Definition: gemm_fragment_stream.h:78
+
Traits::Index Index
Index type.
Definition: gemm_fragment_stream.h:145
+
FragmentStream< TileTraits, TileLoadIterator< TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index >, TileStoreIterator< TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Index, Scalar, IteratorFragment::kScalar, DestinationSkew > > FragmentStream
Define the tile stream.
Definition: gemm_fragment_stream.h:93
+
Traits_ Traits
Traits.
Definition: gemm_fragment_stream.h:111
+ +
Definition: tile_iterator.h:97
+
CUTLASS_DEVICE GemmFragmentStream()
Definition: gemm_fragment_stream.h:181
+
TileTraitsDefault< VectorizedTile, kThreads > TileTraits
Define structure of stripmined tile.
Definition: gemm_fragment_stream.h:81
+
MultiplicandTraits::Shape ScalarTile
Scalar tile shape.
Definition: gemm_fragment_stream.h:75
+
static CUTLASS_DEVICE void shared_store_fence()
The memory fence for shared stores.
Definition: gemm_fragment_stream.h:174
+
Defines a FragmentStream by mapping GEMM dimensions onto contiguous and strided dimensions.
Definition: gemm_fragment_stream.h:45
+
CUTLASS_DEVICE GemmFragmentStream(Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
Constructor - bounds and block offset are aligned to GEMM coordinates (K, N, M)
Definition: gemm_fragment_stream.h:185
+
Base::Fragment Fragment
Loaded fragment type.
Definition: gemm_fragment_stream.h:133
+
GEMM Fragment Stream.
Definition: gemm_fragment_stream.h:98
+
Traits::Scalar const * Pointer
The pointer.
Definition: gemm_fragment_stream.h:152
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:308
+
CUTLASS_DEVICE void commit()
Commits the fragment.
Definition: gemm_fragment_stream.h:199
+
Base::Storage Storage
Destination storage.
Definition: gemm_fragment_stream.h:139
+
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
+
Manages a pair of iterators to stream data from global memory to shared.
Definition: fragment_stream.h:50
+
Definition: gemm_operand.h:66
+
static MatrixLayout::Kind const kLayout
Layout of the operand.
Definition: gemm_fragment_stream.h:53
+
Traits::Scalar Scalar
Scalar type.
Definition: gemm_fragment_stream.h:121
+
Parameters object.
Definition: gemm_fragment_stream.h:155
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:37
+
Index_ Index
Index type.
Definition: gemm_fragment_stream.h:65
+
Definition: shape.h:124
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:63
+
Base::Convert Convert
Converts between tiles.
Definition: gemm_fragment_stream.h:130
+
Definition: gemm_operand.h:94
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
static int const kThreads
Number of threads.
Definition: gemm_fragment_stream.h:59
+
Kind
Definition: matrix_traits.h:36
+
Base::StoreFragment StoreFragment
Stored fragment type.
Definition: gemm_fragment_stream.h:136
+
Base::LoadIterator LoadIterator
Defines the load iterator.
Definition: gemm_fragment_stream.h:124
+
static GemmOperand::Kind const kUsage
Indicates identity of multiplicand.
Definition: gemm_fragment_stream.h:47
+
Tile_ Tile
Definition: tile.h:43
+
Definition: tile_iterator.h:97
+
ShapeDiv< DestinationSkew_, Shape< ScalarsPerInst, ScalarsPerInst, ScalarsPerInst, 1 > >::Shape DestinationSkew
Skew added to shared memory tile.
Definition: gemm_fragment_stream.h:69
+
Kind
Definition: matrix_traits.h:43
+
CUTLASS_DEVICE void residue(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
TODO - Recomputes predicates and clears fetch registers.
Definition: gemm_fragment_stream.h:203
+
ThreadBlockTile_ ThreadBlockTile
Shape of the thread block tile (K, N, M)
Definition: gemm_fragment_stream.h:56
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
CUTLASS_DEVICE void initialize_predicates(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
Recomputes predicates aligned to GEMM coordinates (K, N, M)
Definition: gemm_fragment_stream.h:211
+
Definition: tile_iterator.h:102
+
CUTLASS_DEVICE void load()
Loads the fragment.
Definition: gemm_fragment_stream.h:195
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:556
+
+ + + + diff --git a/docs/generated-html/gemm__global__stream_8h.html b/docs/generated-html/gemm__global__stream_8h.html new file mode 100644 index 00000000..544d0f8b --- /dev/null +++ b/docs/generated-html/gemm__global__stream_8h.html @@ -0,0 +1,119 @@ + + + + + + + +Cutlass: gemm_global_stream.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_global_stream.h File Reference
+
+
+ +

Implements efficient loading of the thread block-level tile from global memory and storing to shared memory. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
 
struct  cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params
 The params. More...
 
union  cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage
 The storage in shared memory needed by that stream. More...
 
struct  cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__global__stream_8h_source.html b/docs/generated-html/gemm__global__stream_8h_source.html new file mode 100644 index 00000000..4eff93c2 --- /dev/null +++ b/docs/generated-html/gemm__global__stream_8h_source.html @@ -0,0 +1,130 @@ + + + + + + + +Cutlass: gemm_global_stream.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_global_stream.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 #include <cutlass/convert.h>
35 
36 namespace cutlass {
37 namespace gemm {
38 
40 
41 template <
43  typename LoadIterator_,
45  typename StoreIterator_,
47  typename Transformer_>
48 
51  typedef LoadIterator_ LoadIterator;
53  typedef Transformer_ Transformer;
55  typedef StoreIterator_ StoreIterator;
56 
58  typedef typename LoadIterator::Fragment FetchedFragment;
60  typedef typename Transformer::OutputFragment TransformedFragment;
63  "");
68  "");
69 
71  static MatrixLayout::Kind const kLayout = LoadIterator::kLayout;
73  typedef typename LoadIterator::Scalar Scalar;
75  typedef typename LoadIterator::Pointer Pointer;
77  typedef typename LoadIterator::Index Index;
78 
80  struct Params {
81  // The load iterator.
82  typename LoadIterator::Params load_iterator;
83  // The store iterator.
84  typename StoreIterator::Params store_iterator;
85 
88  int error_code = load_iterator.initialize(pointer, ld);
89  if (error_code) {
90  return error_code;
91  }
92 
93  return store_iterator.initialize();
94  }
95  };
96 
98  typedef typename StoreIterator::SharedStorage SharedStoreStorage;
99 
102  // The load iterator.
103  typename LoadIterator::SharedStorage load_iterator;
104  // The store iterator.
106  };
107 
109  CUTLASS_DEVICE GlobalLoadStreamBase(Params const& params,
110  SharedStorage& shared_storage,
111  Coord<3> const bounds,
112  Coord<3> const& block)
113  : load_iterator(params.load_iterator, bounds, block),
114  transformer(),
115  store_iterator(params.store_iterator, shared_storage.store_iterator)
116 
117  {
118  fetched_fragment.clear();
119  }
120 
122  CUTLASS_DEVICE void copy() { iterator_load(load_iterator, fetched_fragment); }
123 
125  CUTLASS_DEVICE void commit() {
128  store_iterator.inc_stage();
129  }
130 
132  CUTLASS_DEVICE void residue(Index k, bool skip_clear = false) {
133  load_iterator.residue(k);
134  if (!skip_clear) {
135  fetched_fragment.clear();
136  }
137  }
138 
149 };
150 
152 
153 template <
155  typename LoadIterator_,
157  typename StoreIterator_,
159  typename Transformer_ = Copy<typename LoadIterator_::Fragment> >
160 
161 struct GlobalLoadStream : public GlobalLoadStreamBase<LoadIterator_, StoreIterator_, Transformer_> {
164 
166  CUTLASS_DEVICE GlobalLoadStream(typename Base::Params const& params,
167  typename Base::SharedStorage& shared_storage,
168  Coord<3> const& bounds,
169  Coord<3> const& block)
170  : Base(params, shared_storage, bounds, block) {}
171 };
172 
174 } // namespace gemm
175 } // namespace cutlass
static MatrixLayout::Kind const kLayout
Make sure the transformed fragment is the same as the store fragment.
Definition: gemm_global_stream.h:71
+
StoreIterator::Params store_iterator
Definition: gemm_global_stream.h:84
+
Definition: convert.h:33
+
Defines iterators for efficiently loading and storing to global memory.
+
Transformer_ Transformer
The transformer.
Definition: gemm_global_stream.h:53
+
StoreIterator_ StoreIterator
The store iterator to write to shared memory.
Definition: gemm_global_stream.h:55
+
std::is_same (false specialization)
Definition: platform.h:412
+
StoreIterator::SharedStorage SharedStoreStorage
The amount of storage in shared memory needed to store the tile.
Definition: gemm_global_stream.h:98
+
TransformedFragment Fragment
Make sure the fragments match.
Definition: gemm_global_stream.h:63
+
TransformedFragment transformed_fragment
The fragment to convert the data after it has been fetched from shared memory.
Definition: gemm_global_stream.h:146
+
CUTLASS_DEVICE void residue(Index k, bool skip_clear=false)
Execute the residue code.
Definition: gemm_global_stream.h:132
+
Definition: convert.h:69
+
CUTLASS_HOST_DEVICE int initialize(Pointer pointer, Index ld)
Setup the params.
Definition: gemm_global_stream.h:87
+
LoadIterator load_iterator
The iterator.
Definition: gemm_global_stream.h:140
+
LoadIterator::Params load_iterator
Definition: gemm_global_stream.h:82
+
Definition: gemm_global_stream.h:161
+
Free functions for loading and storing to implementations of tile iteartor concepts.
+
LoadIterator::SharedStorage load_iterator
Definition: gemm_global_stream.h:103
+
CUTLASS_DEVICE GlobalLoadStream(typename Base::Params const &params, typename Base::SharedStorage &shared_storage, Coord< 3 > const &bounds, Coord< 3 > const &block)
Ctor.
Definition: gemm_global_stream.h:166
+
Definition: gemm_global_stream.h:49
+
StoreIterator store_iterator
The store iterator.
Definition: gemm_global_stream.h:148
+
LoadIterator::Pointer Pointer
The pointer.
Definition: gemm_global_stream.h:75
+
SharedStoreStorage store_iterator
Definition: gemm_global_stream.h:105
+
Transformer::OutputFragment TransformedFragment
The fragment that is obtained after the transformation by the transformer.
Definition: gemm_global_stream.h:60
+
LoadIterator::Scalar Scalar
The scalar type of the iterator.
Definition: gemm_global_stream.h:73
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
LoadIterator::Index Index
The index.
Definition: gemm_global_stream.h:77
+
Transformer transformer
The transformer.
Definition: gemm_global_stream.h:144
+
GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > Base
The base class.
Definition: gemm_global_stream.h:163
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
LoadIterator::Fragment FetchedFragment
The fragment that is copied from shared memory.
Definition: gemm_global_stream.h:58
+
The storage in shared memory needed by that stream.
Definition: gemm_global_stream.h:101
+
CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
Stores a fragment to an output iterator.
Definition: iterator_access.h:193
+
FetchedFragment fetched_fragment
The fragment to fetch from shared memory.
Definition: gemm_global_stream.h:142
+
Kind
Definition: matrix_traits.h:36
+
LoadIterator_ LoadIterator
The load iterator.
Definition: gemm_global_stream.h:51
+
CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from an input iterator.
Definition: iterator_access.h:41
+
CUTLASS_DEVICE void commit()
Commit the data.
Definition: gemm_global_stream.h:125
+
CUTLASS_DEVICE void copy()
Load the data from shared memory to the fetch fragment.
Definition: gemm_global_stream.h:122
+
CUTLASS_DEVICE GlobalLoadStreamBase(Params const &params, SharedStorage &shared_storage, Coord< 3 > const bounds, Coord< 3 > const &block)
Ctor.
Definition: gemm_global_stream.h:109
+
Defines conversion operations among Fragments of different base type.
+
The params.
Definition: gemm_global_stream.h:80
+
+ + + + diff --git a/docs/generated-html/gemm__global__tile_8h.html b/docs/generated-html/gemm__global__tile_8h.html new file mode 100644 index 00000000..39d4a361 --- /dev/null +++ b/docs/generated-html/gemm__global__tile_8h.html @@ -0,0 +1,136 @@ + + + + + + + +Cutlass: gemm_global_tile.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_global_tile.h File Reference
+
+
+ +

Defines iterators for efficiently loading and storing to global memory. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >
 
struct  cutlass::gemm::ReshapeThreads< Tile_, Threads_, true >
 
struct  cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
 
struct  cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
struct  cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
 
struct  cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
struct  cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
 
struct  cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params
 
struct  cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
 
struct  cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params
 The params. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__global__tile_8h_source.html b/docs/generated-html/gemm__global__tile_8h_source.html new file mode 100644 index 00000000..d44c18ec --- /dev/null +++ b/docs/generated-html/gemm__global__tile_8h_source.html @@ -0,0 +1,215 @@ + + + + + + + +Cutlass: gemm_global_tile.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_global_tile.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/coord.h>
31 #include <cutlass/util/platform.h>
32 
34 #include <cutlass/matrix_traits.h>
36 #include <cutlass/reshape_tile.h>
37 #include <cutlass/tile_iterator.h>
38 
39 namespace cutlass {
40 namespace gemm {
41 
43 
44 // The following functor reshapes a tile of threads to match a tile of data. The idea is that when
45 // the user wants to build the iterator traits, he/she may want to specify the tile independently
46 // from the number of scalars loaded/stored per instruction. For example, in the row-major version
47 // with a tile of size 128x8 - the user may want to that the iterator works with 32x8 threads if
48 // each thread loads 1 scalar per LDG. If the user changes to 4 scalars per LDG, then the tile of
49 // threads has to change. The code below detects that and correct the code automatically - it is
50 // a helper when the user does not specify the right configuration.
51 
52 template <typename Tile_, typename Threads_, bool = (Tile_::kW < Threads_::kW)>
53 struct ReshapeThreads {
54  typedef Threads_ Threads;
55 };
56 
57 template <typename Tile_, typename Threads_>
59  typedef Shape<Threads_::kD, Threads_::kH * Threads_::kW / Tile_::kW, Tile_::kW, 1> Threads;
60 };
61 
63 
64 template <GemmOperand::Kind kOperand_,
65  MatrixLayout::Kind kLayout_,
66  typename Scalar_,
67  typename Tile_,
68  typename Threads_,
69  int kAccessSize_>
72  static GemmOperand::Kind const kOperand = kOperand_;
74  static MatrixLayout::Kind const kLayout = kLayout_;
78  typedef Scalar_* Pointer;
80  static int const kAccessSize = kAccessSize_;
83 
90 
96  typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kAccessSize>
98 
100 
102  struct ThreadOffset {
105  int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH;
106  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
107 
108  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
109  }
110  };
111 };
112 
114 
115 template <typename Scalar_, typename Tile_, typename Threads_, int kStrideH_, int kAccessSize_>
116 struct GemmGlobalTileCdTraits : public GemmGlobalTileTraits<GemmOperand::kC,
117  MatrixLayout::kColumnMajor,
118  Scalar_,
119  Tile_,
120  Threads_,
121  kAccessSize_> {
125  Scalar_,
126  Tile_,
127  Threads_,
128  kAccessSize_>
130 
132  static int const kStrideH = kStrideH_;
135 
136  typedef typename Base::Iterations Iterations;
137 
138  typedef typename Base::Threads Threads;
139 
141 
143 
145  struct ThreadOffset {
148  int thread_offset_h = threadIdx.x / Threads::kW * kStrideH * Iterations::kH;
149  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
150 
151  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
152  }
153  };
154 };
155 
157 
158 template <typename TileTraits_, typename Index_ = int>
160  : public TileLoadIterator<TileTraits_,
161  typename TileTraits_::Scalar,
162  TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH
163  : IteratorAdvance::kW,
164  MemorySpace::kGlobal,
165  Index_> {
168 
169  typedef TileLoadIterator<TileTraits_,
170  typename TileTraits_::Scalar,
171  TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH
174  Index_>
177  static MatrixLayout::Kind const kLayout = TileTraits_::kLayout;
179  typedef typename Base::Fragment Fragment;
181  typedef typename TileTraits_::Scalar Scalar;
183  typedef typename TileTraits_::Threads Threads;
185  typedef Index_ Index;
187  typedef typename TileTraits_::ThreadOffset ThreadOffset;
190 
192 
194  typedef typename Base::Params BaseParams;
195 
196  struct Params : public BaseParams {
199  Index inc_d = 0;
200  Index inc_advance = 0;
201  // Move by some columns for each iteration in the H dimension.
202  Index inc_h = Base::Delta::kH * stride_h;
203 
204  // Move by some more columns in the number of iterations if the D dimension is > 1.
205  if (Base::Delta::kD > 0) {
206  inc_d = Base::Delta::kD * stride_h - (Base::Iterations::kH - 1) * inc_h;
207  }
208 
209  // Move to the beginning of the next iteration.
210  if (kAdvance == IteratorAdvance::kH && Base::Delta::kD > 0) {
211  inc_advance = inc_d;
212  } else if (kAdvance == IteratorAdvance::kH) {
213  inc_advance = inc_h;
214  } else if (Base::Delta::kD > 0) {
215  inc_advance = (Base::Iterations::kW + 0) * ShapeCount<typename Base::Delta>::kWc -
216  (Base::Iterations::kH - 1) * inc_h -
217  (Base::Iterations::kD - 1) * Base::Delta::kD * stride_h;
218  } else {
219  inc_advance = (Base::Iterations::kW + 0) * ShapeCount<typename Base::Delta>::kWc -
220  (Base::Iterations::kH - 1) * inc_h;
221  }
222 
224  return 0;
225  }
226  };
227 
232 
233  CUTLASS_DEVICE void initialize_predicates(const Coord<3>& bounds, const Coord<3>& block) {
234  // Setup the masks to control loads.
235  predicates.fill(0);
236 
237  int bounds_h, bounds_w;
238  if (kAdvance == IteratorAdvance::kH) {
239  bounds_w = bounds[2] - block[2];
240  bounds_h = bounds[1];
241 
242  } else {
243  bounds_w = bounds[1];
244  bounds_h = bounds[2] - block[1];
245  }
246 
247  // Fill in the bits of the predicate vector.
248  for (int d = 0; d < Base::Iterations::kD; ++d) {
249  for (int h = 0; h < Base::Iterations::kH; ++h) {
250  for (int w = 0; w < Base::Iterations::kW; ++w) {
251  for (int c = 0; c < Base::Iterations::kC; ++c) {
252  bool flag = w * Base::Delta::kW < bounds_w;
253  if (kAdvance == IteratorAdvance::kH) {
254  flag = flag && (h * Base::Delta::kH + d * Base::Delta::kD) < bounds_h;
255  } else {
256  flag = flag && (h * Base::Delta::kH) < bounds_h;
257  }
258  int const bit = ComputeOffsetFromShape<typename Base::Iterations>::get(d, h, w, c);
259  predicates.set(bit, flag);
260  }
261  }
262  }
263  }
264  }
265 
267  CUTLASS_DEVICE GemmGlobalIteratorAb(Params const& _params,
268  const Coord<3>& bounds,
269  const Coord<3>& block,
270  ThreadOffset thread_offset_func = ThreadOffset())
271  : params(_params) {
272  thread_offset = thread_offset_func();
273  // The column.
274  Index block_h = thread_offset[1];
275  // The contiguous dimension.
276  Index block_w = thread_offset[2];
277 
278  // Add the blocks indices.
279  if (kAdvance == IteratorAdvance::kH) {
280  block_h += block[1];
281  block_w += block[2];
282 
283  } else {
284  block_h += block[2];
285  block_w += block[1];
286  }
287 
288  // Setup the pointer.
289  params.pointer += (block_h * params.stride_h + block_w);
290 
291  // Initialize predicates
292  initialize_predicates(bounds, make_Coord(0, block_h, block_w));
293  }
294 
296  CUTLASS_DEVICE void inc_h() { params.pointer += params.inc_h; }
298  CUTLASS_DEVICE void inc_d() { params.pointer += params.inc_d; }
300  CUTLASS_DEVICE void inc_advance() { params.pointer += params.inc_advance; }
301 
304  Scalar const* data() const { return params.pointer; }
305 
307  CUTLASS_DEVICE void residue(Index k) {
308  // The coordinates of the thread.
309  Index block_h = thread_offset[1];
310  // The contiguous dimension.
311  Index block_w = thread_offset[2];
312 
313  // Update the predicate vector.
314  for (int d = 0; d < Base::Iterations::kD; ++d) {
315  for (int h = 0; h < Base::Iterations::kH; ++h) {
316  for (int w = 0; w < Base::Iterations::kW; ++w) {
317  for (int c = 0; c < Base::Iterations::kC; ++c) {
318  Index offset = 0;
319  if (kAdvance == IteratorAdvance::kH) {
320  offset += block_h + h * Base::Delta::kH + d * Base::Delta::kD;
321  } else {
322  offset += block_w + w * Base::Delta::kW;
323  }
324 
325  int const bit = ComputeOffsetFromShape<typename Base::Iterations>::get(d, h, w, c);
326  if (offset >= k) {
327  predicates.set(bit, false);
328  }
329  }
330  }
331  }
332  }
333  }
334 
336  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const {
337  int const bit = ComputeOffsetFromShape<typename Base::Iterations>::get(d, h, w, c);
338  return predicates[bit];
339  }
340 
343 };
344 
346 
347 template <typename TileTraits_, typename Index_ = int>
348 struct GemmGlobalIteratorCd : public TileIteratorBase<TileTraits_,
349  typename TileTraits_::Scalar,
350  IteratorAdvance::kH,
351  MemorySpace::kGlobal,
352  Index_> {
356  typedef TileIteratorBase<TileTraits_,
357  typename TileTraits_::Scalar,
360  Index_>
362 
364  static MatrixLayout::Kind const kLayout = TileTraits_::kLayout;
365 
367  typedef typename TileTraits_::Scalar Scalar;
369  typedef typename TileTraits_::Pointer Pointer;
371  typedef typename TileTraits_::Threads Threads;
373  typedef Index_ Index;
375  typedef typename TileTraits_::ThreadOffset ThreadOffset;
376 
378  struct Params {
389 
392  Pointer pointer, Index ld, Index bound, Index epilogue_stride_w, Index epilogue_delta_w) {
393  // The pointer.
394  this->pointer = pointer;
395  // Each column of the matrix.
396  stride_h = TileTraits_::ThreadsDelta::kH * ld;
397  // Each thread output 1 column per iteration. The stride between columns is given by the
398  // number of scalars that are loaded per LDS for B.
399  inc_h = ld * TileTraits_::kStrideH;
400  inc_advance =
401  (ld - ld * TileTraits_::kStrideH * (Base::Iterations::kH - 1)) + epilogue_stride_w;
402 
403  predicate_offset = bound;
404  predicate_inc_h = TileTraits_::kStrideH;
406  -((TileTraits_::kStrideH * (Base::Iterations::kH - 1) - 1) + epilogue_delta_w);
407 
408  return 0;
409  }
410  };
411 
415 
417  CUTLASS_DEVICE GemmGlobalIteratorCd() {}
418 
420  CUTLASS_DEVICE GemmGlobalIteratorCd(Params const& params,
421  const Coord<3>& bounds,
422  const Coord<3>& block,
423  int offset = 0,
424  int pred_offset = 0,
425  ThreadOffset thread_offset_func = ThreadOffset())
426  : params(params) {
427  thread_offset = thread_offset_func();
428  // Each warp works on a different column of the tile.
429  int const h = thread_offset[1] + block[1];
430  // Each lane writes a different element.
431  int const w = thread_offset[2] + block[2];
432  // Setup the pointer.
433  this->params.pointer += ((h * params.stride_h + w) + offset);
434 
435  // Prepare the vector of predicates.
436  for (int i = 0; i < Base::Iterations::kW; ++i) {
437  predicates.set(i, w + i * Base::Delta::kW < bounds[2]);
438  }
439  this->params.predicate_offset -= (h + pred_offset);
440  }
441 
443  CUTLASS_DEVICE void inc_c() {}
445  CUTLASS_DEVICE void inc_w() {}
447  CUTLASS_DEVICE void inc_h() {
450  }
452  CUTLASS_DEVICE void inc_d() {}
454  CUTLASS_DEVICE void inc_advance() {
457  }
458 
460  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const {
461  return predicates.at(w) && params.predicate_offset > 0;
462  }
463 
466  Pointer data() { return params.pointer; }
467 
469  Pointer const data() const { return params.pointer; }
470 
473 };
474 
476 
477 } // namespace gemm
478 } // namespace cutlass
Definition: gemm_global_tile.h:116
+
Shape< 0, Threads::kH, Threads::kW *kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_global_tile.h:92
+
Index inc_advance
The strides to increment the pointer.
Definition: gemm_global_tile.h:384
+
CUTLASS_DEVICE void inc_d()
Increment the pointer in the D dimension.
Definition: gemm_global_tile.h:452
+
Definition: convert.h:33
+
cutlass::PredicateVector< ShapeCount< typename Base::Iterations >::kCount > PredicateVector
Definition: gemm_global_tile.h:191
+
static MatrixLayout::Kind const kLayout
The layout.
Definition: gemm_global_tile.h:177
+
T type
Definition: platform.h:369
+
Base::Params BaseParams
Iterator parameters type.
Definition: gemm_global_tile.h:194
+
Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSize > Iterations
The number of iterations needed to load/store the tile.
Definition: gemm_global_tile.h:97
+
Index_ Index
The index.
Definition: gemm_global_tile.h:373
+
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
+
GemmGlobalIteratorCd< TileTraits_, Index_ > This_
This class.
Definition: gemm_global_tile.h:354
+
static MatrixLayout::Kind const kLayout
The layout.
Definition: gemm_global_tile.h:364
+
Definition: gemm_global_tile.h:70
+
Scalar_ * Pointer
The pointer.
Definition: gemm_global_tile.h:78
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
Kind
Definition: tile_iterator.h:62
+
CUTLASS_HOST_DEVICE bool at(int idx) const
Accesses a bit within the predicate vector.
Definition: predicate_vector.h:356
+
Definition: load_store.h:43
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
GemmMultiplicandTraits< Tile, kOperand, kLayout > MultiplicandTraits
Definition: gemm_global_tile.h:99
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_global_tile.h:82
+
TileIteratorBase< TileTraits_, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > Base
The base class.
Definition: gemm_global_tile.h:361
+
Shape< 1, 1, Tile::kC > ThreadsDelta
The relative offset between two elements in the H/W dimension in adjacent threads.
Definition: gemm_global_tile.h:89
+
Shape< 0, 0, Base::Delta::kW, Base::Delta::kC > Delta
Override the strides in each dimension between different loads/stores.
Definition: gemm_global_tile.h:134
+
Index predicate_inc_h
Definition: gemm_global_tile.h:386
+
static CUTLASS_DEVICE int get(int d, int h, int w, int c)
Definition: shape.h:166
+
CUTLASS_HOST_DEVICE Pointer const data() const
Definition: gemm_global_tile.h:469
+
CUTLASS_DEVICE void initialize_predicates(const Coord< 3 > &bounds, const Coord< 3 > &block)
Definition: gemm_global_tile.h:233
+
Definition: tile_iterator.h:62
+
static IteratorAdvance::Kind const kAdvance
Specifies in which dimension post-increment accesses advance.
Definition: tile_iterator.h:331
+
TileLoadIterator< TileTraits_, typename TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ > Base
The base class.
Definition: gemm_global_tile.h:175
+
CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
Is the iterator valid?
Definition: gemm_global_tile.h:336
+
Definition: gemm_global_tile.h:196
+
Definition: matrix_traits.h:43
+
C++ features that may be otherwise unimplemented for CUDA device functions.
+
Definition: gemm_global_tile.h:159
+
CUTLASS_DEVICE void inc_advance()
Increment the pointer to move to the next iteration.
Definition: gemm_global_tile.h:454
+
GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > Base
The base class.
Definition: gemm_global_tile.h:129
+
Kind
Definition: load_store.h:40
+
Index stride_h
Definition: tile_iterator.h:172
+
static IteratorAdvance::Kind const kAdvance
Specifies in which dimension post-increment accesses advance.
Definition: gemm_global_tile.h:189
+
TileTraits_::Threads Threads
The threads.
Definition: gemm_global_tile.h:183
+
CUTLASS_HOST_DEVICE int initialize(Scalar const *ptr, Index stride_h)
Initializes params to load a strip-mined tile, given pointer and stride_h.
Definition: gemm_global_tile.h:198
+
CUTLASS_HOST_DEVICE int initialize()
Definition: tile_iterator.h:425
+
static int const kStrideH
The stride in the H dimension.
Definition: gemm_global_tile.h:132
+
static int const kH
The height of the cube.
Definition: shape.h:68
+
Shape< Threads_::kD, Threads_::kH *Threads_::kW/Tile_::kW, Tile_::kW, 1 > Threads
Definition: gemm_global_tile.h:59
+
Index predicate_inc_advance
The strides to increment the predicate offset.
Definition: gemm_global_tile.h:386
+
static GemmOperand::Kind const kOperand
Identity of the operand.
Definition: gemm_global_tile.h:72
+
Index inc_h
Definition: tile_iterator.h:176
+
Defines container classes and iterators for managing a statically sized vector of boolean predicates...
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+ +
PredicateVector predicates
The predicates.
Definition: gemm_global_tile.h:342
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_global_tile.h:76
+
CUTLASS_HOST_DEVICE Scalar const * data() const
Returns the current pointer.
Definition: gemm_global_tile.h:304
+
Defines a type for restructuring a tile.
+
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
+
Base::Fragment Fragment
Fragment type loaded by the iterator.
Definition: gemm_global_tile.h:179
+
TileTraits_::Threads Threads
The threads.
Definition: gemm_global_tile.h:371
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_global_tile.h:147
+
CUTLASS_DEVICE void inc_h()
Increment the pointer in the H dimension.
Definition: gemm_global_tile.h:447
+
CUTLASS_DEVICE GemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int offset=0, int pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())
Ctor.
Definition: gemm_global_tile.h:420
+
Definition: gemm_operand.h:67
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_global_tile.h:102
+
Index inc_advance
Definition: tile_iterator.h:179
+
CUTLASS_DEVICE void residue(Index k)
That&#39;s the residue! Update the predicates.
Definition: gemm_global_tile.h:307
+
CUTLASS_HOST_DEVICE void fill(bool value=true)
Fills all predicates with a given value.
Definition: predicate_vector.h:343
+
CUTLASS_DEVICE GemmGlobalIteratorAb(Params const &_params, const Coord< 3 > &bounds, const Coord< 3 > &block, ThreadOffset thread_offset_func=ThreadOffset())
Ctor.
Definition: gemm_global_tile.h:267
+
CUTLASS_HOST_DEVICE int initialize(Pointer pointer, Index ld, Index bound, Index epilogue_stride_w, Index epilogue_delta_w)
Setup the params.
Definition: gemm_global_tile.h:391
+
CUTLASS_DEVICE void inc_c()
Increment the pointer in the C dimension.
Definition: gemm_global_tile.h:443
+
CUTLASS_HOST_DEVICE Pointer data()
Returns the raw pointer.
Definition: gemm_global_tile.h:466
+
Scalar const * pointer
Pointer to memory.
Definition: tile_iterator.h:390
+
Base::Threads Threads
Definition: gemm_global_tile.h:138
+
Index stride_h
The stride in the H dimension to setup the thread in the block.
Definition: gemm_global_tile.h:382
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_global_tile.h:104
+
Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSize > ImmediateOffsetStrides
Strides for immediate offset computation.
Definition: gemm_global_tile.h:94
+
Statically sized array of bits implementing.
Definition: predicate_vector.h:104
+
CUTLASS_DEVICE void inc_h()
Increment the pointer in the H dimension.
Definition: gemm_global_tile.h:296
+
TileTraits_::ThreadOffset ThreadOffset
The thread offset.
Definition: gemm_global_tile.h:375
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Base::ImmediateOffsetStrides ImmediateOffsetStrides
Definition: gemm_global_tile.h:142
+
TileTraits_::Scalar Scalar
The scalar.
Definition: gemm_global_tile.h:367
+
Index inc_h
Definition: gemm_global_tile.h:384
+
cutlass::PredicateVector< Base::Iterations::kW > predicates
The predicates for the row.
Definition: gemm_global_tile.h:472
+
CUTLASS_DEVICE void inc_d()
Increment the pointer in the D dimension.
Definition: gemm_global_tile.h:298
+
Pointer pointer
The pointer.
Definition: gemm_global_tile.h:380
+
GemmGlobalIteratorAb< TileTraits_, Index_ > This_
This class.
Definition: gemm_global_tile.h:167
+ +
ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
The tile shape.
Definition: gemm_global_tile.h:85
+
Base::Fragment Fragment
Fragment definition.
Definition: tile_iterator.h:364
+
Iterator for accessing a stripmined tile in memory.
Definition: tile_iterator.h:102
+
CUTLASS_DEVICE void inc_w()
Increment the pointer in the W dimension.
Definition: gemm_global_tile.h:445
+
Params params
Definition: gemm_global_tile.h:412
+
Definition: gemm_global_tile.h:348
+
Definition: matrix_traits.h:36
+
Coord< 4 > thread_offset
Offset of an individual lane from the start of the tile.
Definition: gemm_global_tile.h:414
+
TileTraits_::ThreadOffset ThreadOffset
The thread offset.
Definition: gemm_global_tile.h:187
+
static int const kW
The width of the cube.
Definition: shape.h:70
+
CUTLASS_HOST_DEVICE void set(int idx, bool value=true)
Set a bit within the predicate vector.
Definition: predicate_vector.h:364
+
Parameters.
Definition: tile_iterator.h:388
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_global_tile.h:145
+
Kind
Definition: matrix_traits.h:36
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_global_tile.h:80
+
Tile_ Tile
Definition: reshape_tile.h:43
+
Definition: tile_iterator.h:62
+
Base::Iterations Iterations
Definition: gemm_global_tile.h:136
+
Index_ Index
The index.
Definition: gemm_global_tile.h:185
+
TileTraits_::Pointer Pointer
The pointer.
Definition: gemm_global_tile.h:369
+
Kind
Definition: matrix_traits.h:43
+
TileTraits_::Scalar Scalar
The scalar.
Definition: gemm_global_tile.h:181
+
Threads_ Threads
Definition: gemm_global_tile.h:54
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
CUTLASS_DEVICE void inc_advance()
Increment the pointer to move to the next iteration.
Definition: gemm_global_tile.h:300
+
CUTLASS_DEVICE GemmGlobalIteratorCd()
Ctor.
Definition: gemm_global_tile.h:417
+
Params params
The parameters.
Definition: gemm_global_tile.h:231
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
The params.
Definition: gemm_global_tile.h:378
+
Base::ThreadsDelta ThreadsDelta
Definition: gemm_global_tile.h:140
+
CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
Test the validity of the iterator.
Definition: gemm_global_tile.h:460
+
Coord< 4 > thread_offset
Offset of an individual lane from the start of the tile.
Definition: gemm_global_tile.h:229
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
Index predicate_offset
The column offset to compute the predicate for the columns.
Definition: gemm_global_tile.h:388
+
Index inc_d
Definition: tile_iterator.h:175
+
static MatrixLayout::Kind const kLayout
The layout.
Definition: gemm_global_tile.h:74
+
+ + + + diff --git a/docs/generated-html/gemm__operand_8h.html b/docs/generated-html/gemm__operand_8h.html new file mode 100644 index 00000000..17d7ebd3 --- /dev/null +++ b/docs/generated-html/gemm__operand_8h.html @@ -0,0 +1,134 @@ + + + + + + + +Cutlass: gemm_operand.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_operand.h File Reference
+
+
+ +

Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >
 Helper to describe attributes of GEMM matrix operands. More...
 
struct  cutlass::gemm::GetExtent< kOperand_, Tile_ >
 
struct  cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >
 
struct  cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >
 
struct  cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >
 
struct  cutlass::gemm::ProjectOperand< operand, Kstrided >
 
struct  cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >
 Project A operand - (0, K, M) More...
 
struct  cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >
 Project B operand - (0, K, N) More...
 
struct  cutlass::gemm::ProjectOperand< GemmOperand::kC, true >
 Project C operand - (0, N, M) More...
 
struct  cutlass::gemm::ProjectOperand< GemmOperand::kD, true >
 Project D operand - (0, N, M) More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__operand_8h_source.html b/docs/generated-html/gemm__operand_8h_source.html new file mode 100644 index 00000000..83b58f2b --- /dev/null +++ b/docs/generated-html/gemm__operand_8h_source.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: gemm_operand.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_operand.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/matrix_traits.h>
32 #include <cutlass/reshape_tile.h>
33 #include <cutlass/util/platform.h>
34 
35 namespace cutlass {
36 namespace gemm {
37 
39 
41 template <GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_>
43  static const bool Congruous =
44  (kOperand_ == GemmOperand::kA ^ kLayout_ == MatrixLayout::kRowMajor);
45 };
46 
48 
49 template <typename GemmOperand::Kind kOperand_, typename Tile_>
50 struct GetExtent;
51 
52 template <typename Tile_>
53 struct GetExtent<GemmOperand::kA, Tile_> {
54  static const int kExtent = Tile_::kW;
55 };
56 
57 template <typename Tile_>
58 struct GetExtent<GemmOperand::kB, Tile_> {
59  static const int kExtent = Tile_::kH;
60 };
61 
63 
66 template <typename ThreadBlockTile_, GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
68  // Only defined for A or B
69  static_assert(Usage == GemmOperand::kA || Usage == GemmOperand::kB,
70  "MultiplicandTileShape defined only for A or B operands.");
71 
73  typedef ThreadBlockTile_ ThreadBlockTile;
74 
76  static GemmOperand::Kind const kUsage = Usage;
77 
79  static MatrixLayout::Kind const kLayout = Layout;
80 
81  // True if K is the strided dimension
83 
85  typedef typename platform::conditional<
86  kKstrided,
89 };
90 
92 
95 template <GemmOperand::Kind operand, bool Kstrided = true>
97 
99 template <bool Kstrided>
100 struct ProjectOperand<GemmOperand::kA, Kstrided> {
102  static Coord<3> project(Coord<3> const &coord) {
103  if (Kstrided) {
104  return make_Coord(0, coord[0], coord[2]);
105  } else {
106  return make_Coord(0, coord[2], coord[0]);
107  }
108  }
109 };
110 
112 template <bool Kstrided>
113 struct ProjectOperand<GemmOperand::kB, Kstrided> {
115  static Coord<3> project(Coord<3> const &coord) {
116  if (Kstrided) {
117  return make_Coord(0, coord[0], coord[1]);
118  } else {
119  return make_Coord(0, coord[1], coord[0]);
120  }
121  }
122 };
123 
125 template <>
126 struct ProjectOperand<GemmOperand::kC, true> {
128  static Coord<3> project(Coord<3> const &coord) { return make_Coord(0, coord[1], coord[2]); }
129 };
130 
132 template <>
133 struct ProjectOperand<GemmOperand::kD, true> {
135  static Coord<3> project(Coord<3> const &coord) { return make_Coord(0, coord[1], coord[2]); }
136 };
137 
139 
140 } // namespace gemm
141 } // namespace cutlass
static bool const kKstrided
Definition: gemm_operand.h:82
+
static CUTLASS_HOST_DEVICE Coord< 3 > project(Coord< 3 > const &coord)
Definition: gemm_operand.h:115
+
Definition: convert.h:33
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
static CUTLASS_HOST_DEVICE Coord< 3 > project(Coord< 3 > const &coord)
Definition: gemm_operand.h:128
+
C++ features that may be otherwise unimplemented for CUDA device functions.
+
ThreadBlockTile_ ThreadBlockTile
Shape of GEMM thread block tile (K, N, M)
Definition: gemm_operand.h:70
+
platform::conditional< kKstrided, Shape< 1, ThreadBlockTile::kD, GetExtent< Usage, ThreadBlockTile >::kExtent >, Shape< 1, GetExtent< Usage, ThreadBlockTile >::kExtent, ThreadBlockTile::kD > >::type Shape
Map the ThreadBlockShape onto (kH, kW) dimensions for A and B operand.
Definition: gemm_operand.h:88
+
Definition: matrix_traits.h:36
+
Defines a type for restructuring a tile.
+
Definition: gemm_operand.h:67
+
static const bool Congruous
Definition: gemm_operand.h:43
+
Definition: matrix_traits.h:43
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
std::conditional (true specialization)
Definition: platform.h:343
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
static MatrixLayout::Kind const kLayout
Layout of tile.
Definition: gemm_operand.h:79
+
static CUTLASS_HOST_DEVICE Coord< 3 > project(Coord< 3 > const &coord)
Definition: gemm_operand.h:102
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Definition: gemm_operand.h:96
+
Definition: gemm_operand.h:50
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
Gemm operand - D = A * B + C.
Definition: matrix_traits.h:42
+
Kind
Definition: matrix_traits.h:36
+
static CUTLASS_HOST_DEVICE Coord< 3 > project(Coord< 3 > const &coord)
Definition: gemm_operand.h:135
+
Kind
Definition: matrix_traits.h:43
+
Definition: matrix_traits.h:43
+
static GemmOperand::Kind const kUsage
Identifies multiplicand.
Definition: gemm_operand.h:76
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
Helper to describe attributes of GEMM matrix operands.
Definition: gemm_operand.h:42
+
+ + + + diff --git a/docs/generated-html/gemm__shared__stream_8h.html b/docs/generated-html/gemm__shared__stream_8h.html new file mode 100644 index 00000000..314df24b --- /dev/null +++ b/docs/generated-html/gemm__shared__stream_8h.html @@ -0,0 +1,112 @@ + + + + + + + +Cutlass: gemm_shared_stream.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_shared_stream.h File Reference
+
+
+ +

Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEMM pipeline. +More...

+ +

Go to the source code of this file.

+ + + + + + + +

+Classes

struct  cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
 
struct  cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params
 The params. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__shared__stream_8h_source.html b/docs/generated-html/gemm__shared__stream_8h_source.html new file mode 100644 index 00000000..8a0ec9da --- /dev/null +++ b/docs/generated-html/gemm__shared__stream_8h_source.html @@ -0,0 +1,112 @@ + + + + + + + +Cutlass: gemm_shared_stream.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_shared_stream.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
32 
33 namespace cutlass {
34 namespace gemm {
35 
37 
38 template <
40  typename Iterator_,
42  typename Transformer_ = Copy<typename Iterator_::Fragment> >
43 
46  typedef Iterator_ Iterator;
48  typedef Transformer_ Transformer;
49 
51  typedef typename Iterator::Fragment FetchedFragment;
53  typedef typename Transformer::OutputFragment TransformedFragment;
56  "");
59 
61  struct Params {
63  typename Iterator::Params iterator;
64 
66  CUTLASS_HOST_DEVICE int initialize() { return iterator.initialize(); }
67  };
68 
70  typedef typename Iterator::Storage SharedStorage;
71 
73  CUTLASS_DEVICE SharedLoadStream() {}
74 
76  CUTLASS_DEVICE SharedLoadStream(Params const &params, SharedStorage &shared_storage) {
77  this->initialize(params, shared_storage);
78  }
79 
81  CUTLASS_DEVICE void initialize(Params const &params, SharedStorage &shared_storage) {
82  // The iterator.
83  iterator = Iterator(params.iterator, shared_storage);
84  // The transformer.
86  }
87 
89  CUTLASS_DEVICE void copy(FetchedFragment &fetched) { shared_iterator_load(iterator, fetched); }
90 
92  CUTLASS_DEVICE void copy(int d, FetchedFragment &fetched) {
93  shared_iterator_load(iterator, fetched, d);
94  }
95 
97  CUTLASS_DEVICE void commit(FetchedFragment &fetched, TransformedFragment &transformed) {
98  transformer.transform(fetched, transformed);
99  }
100 
102  CUTLASS_DEVICE void inc_stage() { iterator.inc_stage(); }
103 
108 };
109 
111 
112 } // namespace gemm
113 } // namespace cutlass
CUTLASS_DEVICE void copy(FetchedFragment &fetched)
Load the data from shared memory to the fetch fragment.
Definition: gemm_shared_stream.h:89
+
Definition: convert.h:33
+
CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from a shared memory input iterator.
Definition: iterator_access.h:75
+
CUTLASS_DEVICE void initialize(Params const &params, SharedStorage &shared_storage)
Initialize the stream.
Definition: gemm_shared_stream.h:81
+
std::is_same (false specialization)
Definition: platform.h:412
+
Iterator::Storage SharedStorage
The storage in shared memory needed by that stream.
Definition: gemm_shared_stream.h:70
+
CUTLASS_DEVICE void commit(FetchedFragment &fetched, TransformedFragment &transformed)
Commit the data.
Definition: gemm_shared_stream.h:97
+
CUTLASS_DEVICE void inc_stage()
Increment the stage.
Definition: gemm_shared_stream.h:102
+
CUTLASS_DEVICE SharedLoadStream()
Ctor.
Definition: gemm_shared_stream.h:73
+
Defines iterators for efficiently loading and storing tiles to and from shared memory.
+
Definition: gemm_shared_stream.h:44
+
Transformer transformer
The transformer.
Definition: gemm_shared_stream.h:107
+
TransformedFragment Fragment
Make sure the fragments match.
Definition: gemm_shared_stream.h:56
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_DEVICE void copy(int d, FetchedFragment &fetched)
Load the data from shared memory to the fetch fragment.
Definition: gemm_shared_stream.h:92
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Iterator::Params iterator
The iterator params.
Definition: gemm_shared_stream.h:63
+
Iterator iterator
The iterator.
Definition: gemm_shared_stream.h:105
+
CUTLASS_HOST_DEVICE int initialize()
Setup the params.
Definition: gemm_shared_stream.h:66
+
Transformer::OutputFragment TransformedFragment
The fragment that is obtained after the transformation by the transformer.
Definition: gemm_shared_stream.h:53
+
The params.
Definition: gemm_shared_stream.h:61
+
Iterator::Fragment FetchedFragment
The fragment that is copied from shared memory.
Definition: gemm_shared_stream.h:51
+
Transformer_ Transformer
The transformer.
Definition: gemm_shared_stream.h:48
+
Iterator_ Iterator
The load iterator.
Definition: gemm_shared_stream.h:46
+
CUTLASS_DEVICE SharedLoadStream(Params const &params, SharedStorage &shared_storage)
Ctor.
Definition: gemm_shared_stream.h:76
+
+ + + + diff --git a/docs/generated-html/gemm__shared__tile_8h.html b/docs/generated-html/gemm__shared__tile_8h.html new file mode 100644 index 00000000..cf63242a --- /dev/null +++ b/docs/generated-html/gemm__shared__tile_8h.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: gemm_shared_tile.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_shared_tile.h File Reference
+
+
+ +

Defines iterators for efficiently loading and storing tiles to and from shared memory. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >
 
struct  cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset
 
struct  cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
 
struct  cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset
 
struct  cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
 
struct  cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
struct  cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
 
struct  cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
struct  cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
 
struct  cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
struct  cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
 
struct  cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__shared__tile_8h_source.html b/docs/generated-html/gemm__shared__tile_8h_source.html new file mode 100644 index 00000000..7fe9d1ff --- /dev/null +++ b/docs/generated-html/gemm__shared__tile_8h_source.html @@ -0,0 +1,214 @@ + + + + + + + +Cutlass: gemm_shared_tile.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_shared_tile.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename Scalar_, typename Tile_, typename Threads_, int kScalarsPerSts_>
42  typedef Scalar_* Pointer;
46  typedef Threads_ Threads;
48  typedef Shape<0, ShapeCount<Tile>::kWc, Tile::kC, kScalarsPerSts_> ThreadsStrides;
50  static int const kSkew = 0;
52  static int const kAccessSize = kScalarsPerSts_;
55 
57  typedef Shape<1,
58  Tile::kH / Threads::kH,
59  Tile::kW / Threads::kW,
60  Tile::kC / Threads::kC / kAccessSize>
67 
68  struct ThreadOffset {
70  Coord<4> operator()() const {
72  return make_Coord(0, 0, offset, 0);
73  }
74  };
75 };
76 
78 
79 template <typename Scalar_, typename Tile_, typename Threads_, int kScalarsPerSts_, int kSkew_>
84  typedef Scalar_* Pointer;
89  kScalarsPerSts_>::Tile Tile;
91  typedef Threads_ Threads;
93  static int const kSkew = kSkew_;
95  static int const kAccessSize = kScalarsPerSts_;
98 
100  typedef Shape<1, TileWithoutSkew::kH / Threads::kW, TileWithoutSkew::kW / Threads::kH> Iterations;
105 
106  struct ThreadOffset {
110  return make_Coord(0, 0, offset, 0);
111  }
112  };
113 
114  protected:
117 };
118 
120 
121 template <typename Scalar_,
122  typename OutputTile_,
123  typename Warps_,
124  typename ThreadsPerWarp_,
125  typename InstructionShape_,
126  int kStages_,
127  int kScalarsPerLds_,
128  int kSkew_ = 0>
134  typedef Scalar_* Pointer;
136  typedef Shape<kStages_,
137  OutputTile_::kD / InstructionShape_::kD,
138  GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD>
147  typedef Warps_ Warps;
149  typedef ThreadsPerWarp_ ThreadsPerWarp;
151  // static int const kScalarsPerLds = kScalarsPerLds_;
152  static int const kAccessSize = kScalarsPerLds_;
154  static int const kSkew = kSkew_;
157 
162 
164  typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp /* / kScalarsPerLds*/>
171 
173  struct ThreadOffset {
176  // Extract the warp.
177  int const warp = threadIdx.x / kWarpSize % Warps::kW;
178  // Compute the row offset for each thread
179  int const lane = (threadIdx.x & 0x0e) / 2;
180  // The offset.
181  int const offset = (warp * ThreadsPerWarp::kW + lane) * kAccessSize;
182 
183  return make_Coord(0, 0, offset, 0);
184  }
185  };
186 };
187 
189 
190 template <typename Scalar_,
191  typename OutputTile_,
192  typename Warps_,
193  typename ThreadsPerWarp_,
194  typename InstructionShape_,
195  int kStages_,
196  int kScalarsPerLds_,
197  int kSkew_ = 0>
203  typedef Scalar_* Pointer;
205  typedef Shape<kStages_,
206  OutputTile_::kD / InstructionShape_::kD,
207  GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD>
216  typedef Warps_ Warps;
218  typedef ThreadsPerWarp_ ThreadsPerWarp;
220  static int const kAccessSize = kScalarsPerLds_;
222  static int const kSkew = kSkew_;
225 
230 
232  typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp /* / kAccessSize*/> Iterations;
238 
240  struct ThreadOffset {
243  // The position of the warp.
244  int const warp = threadIdx.x / (Warps::kW * kWarpSize);
245 
246  // Compute the column offset for each thread
247  int const lane = (threadIdx.x & 0x10) / 8 + (threadIdx.x & 0x01);
248  // The offset.
249  int const offset = (warp * ThreadsPerWarp::kH + lane) * kAccessSize;
250 
251  return make_Coord(0, 0, offset, 0);
252  }
253  };
254 };
255 
257 
258 template <typename Scalar_,
259  typename OutputTile_,
260  typename Warps_,
261  typename ThreadsPerWarp_,
262  int kScalarsPerSts_,
263  int kSkew_ = 0>
268  typedef Scalar_* Pointer;
270  typedef OutputTile_ OutputTile;
272  typedef Warps_ Warps;
274  typedef ThreadsPerWarp_ ThreadsPerWarp;
276  static int const kAccessSize = kScalarsPerSts_;
278  static int const kSkew = kSkew_;
281 
283  static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW;
285  static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize;
287  static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew;
288 
297 
299  struct ThreadOffset {
302  // We issue STS.128 in the epilogue to store the accumulators to shared memory. When we use
303  // STS.128, we have to guarantee that threads in groups of 8 do not have bank conflicts (i.e
304  // they write to different banks).
305 
306  // Odd threads go to the second half of shared memory.
307  int const row = threadIdx.x & 0x01;
308 
309  int const warp_id = (threadIdx.x >> 5);
310 
311  int const warp_row = (warp_id % Warps::kW);
312  int const warp_col = (warp_id / Warps::kW);
313 
314  int hi_halfwarp_offset = OutputTile::kW * ((threadIdx.x >> 4) & 1);
315  int lo_halfwarp_offset = (((threadIdx.x >> 1) & 0x7) + warp_row * ThreadsPerWarp::kW);
316 
317  int col = kAccessSize * lo_halfwarp_offset +
318  warp_col * (ThreadsPerWarp::kH / 2) * OutputTile::kW + hi_halfwarp_offset;
319 
320  int offset = row * kScalarsPerRow + col;
321  return make_Coord(0, 0, offset, 0);
322  }
323  };
324 };
325 
327 
328 template <typename Scalar_,
329  typename OutputTile_,
330  typename Warps_,
331  typename ThreadsPerWarp_,
332  int kTileH_,
333  int kScalarsPerLds_,
334  int kSkew_ = 0>
339  typedef Scalar_* Pointer;
341  typedef OutputTile_ OutputTile;
343  typedef Warps_ Warps;
345  typedef ThreadsPerWarp_ ThreadsPerWarp;
347  static int const kAccessSize = kScalarsPerLds_;
349  static int const kSkew = kSkew_;
352 
354  static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW;
356  static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize;
358  static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew;
359 
362 
363  // Compute the number of iterations per warp in the Tile::kH dimension.
364  static int const kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount;
365 
366  // As shown above, the shared memory tile is composed of 2 rows and each rows is made of
367  // kScalarsPerRow. A warp is expected to read from the 1st row, then move to the 2nd row and go
368  // back to the 1st row. To model that scheme we define the Iterations shape as Shape<X, 2, ...>.
369  // However, in some cases, we have only 1 iteration per warp. In that case, we must define the
370  // shape as Shape<1, 1, ...>. The following code does that.
371  static int const kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2;
372  // As soon as we know kIterationsH, it is trivial to compute kIterationsD:
374 
376  typedef Shape<kIterationsD, kIterationsH, OutputTile::kW / kWarpSize / kAccessSize> Iterations;
381 
383  struct ThreadOffset {
386  // Each warp works on a different column.
387  int const h = threadIdx.x / kWarpSize;
388  // Compute the row.
389  int const w = (threadIdx.x & (kWarpSize - 1)) * kAccessSize;
390  int offset = 0;
391  if (Iterations::kH == 1) {
392  int const row = h & 0x1;
393  int const col = h / 2;
394  offset = row * ShapeCount<Tile>::kWc + col * OutputTile::kW * Iterations::kD + w;
395  } else {
396  offset = h * OutputTile::kW * Iterations::kD + w;
397  }
398  return make_Coord(0, 0, offset, 0);
399  }
400  };
401 };
402 
404 
405 } // namespace gemm
406 } // namespace cutlass
static int const kAccessSize
The number of scalars per STS.
Definition: gemm_shared_tile.h:95
+
static CUTLASS_DEVICE int get()
Definition: shape.h:253
+
ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
The tile.
Definition: gemm_shared_tile.h:214
+
ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
The tile.
Definition: gemm_shared_tile.h:145
+
ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
The tile without skew after reshaping.
Definition: gemm_shared_tile.h:212
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:351
+
static int const kScalarsPerThread
The number of scalars per thread.
Definition: gemm_shared_tile.h:354
+
Definition: load_store.h:42
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:242
+
Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarp > Iterations
The number of iterations needed to load/store the tile.
Definition: gemm_shared_tile.h:232
+
Definition: convert.h:33
+
static int const kWarps
The number of warps.
Definition: gemm_shared_tile.h:227
+
Definition: gemm_shared_tile.h:129
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:42
+ +
static int const kScalarsPerRow
The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts).
Definition: gemm_shared_tile.h:287
+
T type
Definition: platform.h:369
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:132
+ +
Shape< 1, 1, kScalarsPerThread/kAccessSize > Iterations
The number of iterations needed to store the tile.
Definition: gemm_shared_tile.h:292
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_shared_tile.h:347
+
ThreadsPerWarp_ ThreadsPerWarp
The threads in a warp.
Definition: gemm_shared_tile.h:149
+
Definition: reshape_tile.h:42
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
Shape< 0, ShapeCount< Tile >::kWc, Tile::kC, kScalarsPerSts_ > ThreadsStrides
The strides to compute the base position of the thread.
Definition: gemm_shared_tile.h:48
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_shared_tile.h:276
+
Shape< kIterationsD, kIterationsH, OutputTile::kW/kWarpSize/kAccessSize > Iterations
The number of iterations needed to store the tile.
Definition: gemm_shared_tile.h:376
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:349
+
Warps_ Warps
The number of warps.
Definition: gemm_shared_tile.h:216
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:222
+
Definition: gemm_shared_tile.h:38
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:201
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_shared_tile.h:383
+
Definition: gemm_shared_tile.h:198
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:156
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:40
+
static GemmOperand::Kind const kOperand
Definition: gemm_shared_tile.h:130
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:280
+
Kind
Definition: load_store.h:40
+
Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
The tile with skew.
Definition: gemm_shared_tile.h:210
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_shared_tile.h:152
+
static int const kH
The height of the cube.
Definition: shape.h:68
+
Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/Threads::kC/kAccessSize > Iterations
The number of iterations needed to load/store the tile.
Definition: gemm_shared_tile.h:61
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:93
+
Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarp > Iterations
The number of iterations needed to load/store the tile.
Definition: gemm_shared_tile.h:165
+
OutputTile_ OutputTile
The dimension of the output tile.
Definition: gemm_shared_tile.h:270
+
static int const kScalarsPerRow
The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts).
Definition: gemm_shared_tile.h:358
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:203
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:134
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:268
+
static int const kScalarsPerThread
The number of scalars per thread.
Definition: gemm_shared_tile.h:283
+
Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kAccessSize > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:380
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:301
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:54
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:50
+
static int const kThreadsPerWarp
The number of threads in one dimension of the warp.
Definition: gemm_shared_tile.h:229
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_shared_tile.h:240
+
Shape< 0, ShapeCount< Tile >::kWc, Threads::kH *kAccessSize > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:104
+
Shape< 1, 2, kScalarsPerRow/kAccessSize, kAccessSize > Tile
The tile.
Definition: gemm_shared_tile.h:290
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_shared_tile.h:52
+
ReshapeTile< Tile_, kScalarsPerSts_ >::Tile Tile
The tile.
Definition: gemm_shared_tile.h:44
+ +
static int const kIterationsInHPerWarp
Definition: gemm_shared_tile.h:364
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:278
+
ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
The tile without skew after reshaping.
Definition: gemm_shared_tile.h:143
+
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
+
Shape< 0, Threads::kH *ShapeCount< Tile >::kWc, Threads::kW *kAccessSize > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:66
+
Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:170
+
ReshapeTile< Shape< Tile_::kD, Tile_::kH, Tile_::kW+kSkew_ >, kScalarsPerSts_ >::Tile Tile
The tile.
Definition: gemm_shared_tile.h:89
+
Shape< 0, kScalarsPerSts_, ShapeCount< Tile >::kHwc/Threads::kW > ThreadsStrides
The strides to compute the base position of the thread.
Definition: gemm_shared_tile.h:116
+
ReshapeTile< Tile_, kScalarsPerSts_ >::Tile TileWithoutSkew
The tile without skews.
Definition: gemm_shared_tile.h:86
+
static int const kIterationsD
Definition: gemm_shared_tile.h:373
+
static int const kWarps
The number of warps.
Definition: gemm_shared_tile.h:159
+
Definition: matrix_traits.h:43
+
ThreadsPerWarp_ ThreadsPerWarp
The threads in the warps.
Definition: gemm_shared_tile.h:274
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_shared_tile.h:173
+
Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
The tile without skew.
Definition: gemm_shared_tile.h:139
+
Definition: gemm_shared_tile.h:335
+
Threads_ Threads
The threads.
Definition: gemm_shared_tile.h:91
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
OutputTile_ OutputTile
The dimension of the output tile.
Definition: gemm_shared_tile.h:341
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:82
+
Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:167
+
Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kAccessSize > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:296
+
static GemmOperand::Kind const kOperand
Definition: gemm_shared_tile.h:199
+
Shape< 1, 2, kScalarsPerRow/kAccessSize, kAccessSize > Tile
The tile.
Definition: gemm_shared_tile.h:361
+
static int const kThreadsPerWarp
The number of threads in one dimension of the warp.
Definition: gemm_shared_tile.h:161
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:84
+
Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:378
+
Shape< 1, TileWithoutSkew::kH/Threads::kW, TileWithoutSkew::kW/Threads::kH > Iterations
The number of iterations needed to load/store the tile.
Definition: gemm_shared_tile.h:100
+
Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
The tile without skew.
Definition: gemm_shared_tile.h:208
+
Threads_ Threads
The threads.
Definition: gemm_shared_tile.h:46
+
Definition: gemm_operand.h:50
+ +
Shape< 0, Threads::kH *ShapeCount< Tile >::kWc, Threads::kW *kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:63
+
static int const kThreads
The number of threads.
Definition: gemm_shared_tile.h:356
+
Warps_ Warps
The number of warps.
Definition: gemm_shared_tile.h:147
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:97
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:224
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:70
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:175
+
static int const kD
The depth of the cube.
Definition: shape.h:66
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_shared_tile.h:299
+
Warps_ Warps
The warps in the tile.
Definition: gemm_shared_tile.h:343
+
Tile_ Tile
Definition: reshape_tile.h:43
+
Shape< 0, ShapeCount< Tile >::kWc, Threads::kH *kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:102
+
static int const kIterationsH
Definition: gemm_shared_tile.h:371
+
Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:294
+
Kind
Definition: matrix_traits.h:43
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:154
+
ThreadsPerWarp_ ThreadsPerWarp
The threads in the warps.
Definition: gemm_shared_tile.h:345
+
Definition: matrix_traits.h:43
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:339
+
static int const kThreads
The number of threads.
Definition: gemm_shared_tile.h:285
+
Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:234
+
ThreadsPerWarp_ ThreadsPerWarp
The threads in a warp.
Definition: gemm_shared_tile.h:218
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:266
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
The tile with skew.
Definition: gemm_shared_tile.h:141
+
Warps_ Warps
The warps in the tile.
Definition: gemm_shared_tile.h:272
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:108
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:337
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:385
+
Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:237
+
Definition: gemm_shared_tile.h:264
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_shared_tile.h:220
+
+ + + + diff --git a/docs/generated-html/gemm__traits_8h.html b/docs/generated-html/gemm__traits_8h.html new file mode 100644 index 00000000..d782b378 --- /dev/null +++ b/docs/generated-html/gemm__traits_8h.html @@ -0,0 +1,151 @@ + + + + + + + +Cutlass: gemm_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_traits.h File Reference
+
+
+ +

Defines structural properties of complete GEMM computation. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
 
struct  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
 
struct  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
 The params. More...
 
union  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >
 
struct  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage
 
union  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage
 The storage in shared memory. More...
 
struct  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream
 Assemble the global load streams for A/B. More...
 
struct  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
 Assemble the shared load stream for A/B. More...
 
struct  cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
 
struct  cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__traits_8h_source.html b/docs/generated-html/gemm__traits_8h_source.html new file mode 100644 index 00000000..7ca9f4b8 --- /dev/null +++ b/docs/generated-html/gemm__traits_8h_source.html @@ -0,0 +1,252 @@ + + + + + + + +Cutlass: gemm_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/convert.h>
36 #include <cutlass/matrix_traits.h>
37 #include <cutlass/reshape_tile.h>
38 #include <cutlass/tile_iterator.h>
39 
40 namespace cutlass {
41 namespace gemm {
42 
44 
45 template <
47  typename ScalarA_,
49  typename ScalarB_,
51  typename ScalarC_,
53  typename ScalarD_,
55  typename OutputTile_,
57  typename MultiplyAdd_,
59  int kScalarsPerLdgA_,
61  int kScalarsPerStsA_,
63  int kScalarsPerLdsA_,
65  int kScalarsPerLdgB_,
67  int kScalarsPerStsB_,
69  int kScalarsPerLdsB_,
71  int kScalarsPerLdgCAndStgD_,
73  int kScalarsPerStsD_,
75  int kScalarsPerLdsD_,
77  int kStages_>
78 
79 struct GemmConfig {
80  //
82  typedef ScalarA_ ScalarA;
84  typedef ScalarB_ ScalarB;
86  typedef ScalarC_ ScalarC;
88  typedef ScalarD_ ScalarD;
89 
91  typedef OutputTile_ OutputTile;
93  typedef MultiplyAdd_ MultiplyAdd;
100 
104  static int const kWarpSize = cutlass::kWarpSize;
107 
109  static int const kScalarsPerLdgA = kScalarsPerLdgA_;
110  static int const kScalarsPerStsA = kScalarsPerStsA_;
111  static int const kScalarsPerLdsA = kScalarsPerLdsA_;
112 
114  static int const kScalarsPerLdgB = kScalarsPerLdgB_;
115  static int const kScalarsPerStsB = kScalarsPerStsB_;
116  static int const kScalarsPerLdsB = kScalarsPerLdsB_;
117 
119  static int const kScalarsPerLdgC = kScalarsPerLdgCAndStgD_;
120 
122  static int const kScalarsPerStgD = kScalarsPerLdgCAndStgD_;
123  static int const kScalarsPerStsD = kScalarsPerStsD_;
124  static int const kScalarsPerLdsD = kScalarsPerLdsD_;
125 
127  static int const kAccumulatorsPerLdsA = kScalarsPerLdsA / InstructionShape::kD;
128  static int const kAccumulatorsPerLdsB = kScalarsPerLdsB / InstructionShape::kD;
129 
131  static int const kStages = kStages_;
132 };
133 
135 
136 template <enum MatrixLayout::Kind, typename GemmConfig_>
138 
140 
141 template <typename GemmConfig_>
142 struct GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> {
145 
147  typedef typename GemmConfig_::ScalarA Scalar;
149  typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar;
150 
152  typedef GemmGlobalTileTraits<
153  // That's A.
155  // A is column-major.
157  // The pointer is float const.
158  Scalar const,
159  // The tile has size KxM in GEMM's terminology.
161  // The threads are distributed as warps x 32 (the traits may reorganize).
163  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
164  GemmConfig_::kScalarsPerLdgA>
166 
169  // The pointer is float.
171  // The tile has size KxM in GEMM's terminology.
172  Shape<GemmConfig_::kStages,
173  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
174  GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>,
175  // The threads are distributed as warps x 32 (the traits may reorganize).
176  typename GlobalTileTraits::Threads,
177  // The number of scalars per STS (STS.32 or STS.128, etc).
178  GemmConfig_::kScalarsPerStsA>
180 
183  // The pointer is float const.
184  MultiplyAddScalar const,
185  // The output tile size.
186  typename GemmConfig_::OutputTile,
187  // The number of warps.
188  typename GemmConfig_::Warps,
189  // The number of threads per warp.
190  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
191  // The shape of the FMA instruction.
192  typename GemmConfig_::InstructionShape,
193  // The number of stages.
194  GemmConfig_::kStages,
195  // The number of scalars per LDS.
196  GemmConfig_::kScalarsPerLdsA,
197  // The skew.
198  0>
200 };
201 
203 
204 template <typename GemmConfig_>
205 struct GemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_> {
208 
210  typedef typename GemmConfig_::ScalarA Scalar;
212  typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar;
213 
215  typedef GemmGlobalTileTraits<
216  // That's A.
218  // A is row-major.
220  // The pointer is float const.
221  Scalar const,
222  // The tile has size MxK in GEMM's terminology.
224  // The threads are distributed as (threads / K) x K (the traits may reorganize).
225  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
226  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
227  GemmConfig_::kScalarsPerLdgA>
229 
231  static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar);
234  // The pointer is float.
236  // The tile has size KxM in GEMM's terminology.
237  Shape<GemmConfig_::kStages,
238  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
239  GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>,
240  // The threads are distributed as (threads / K) x K (the traits may reorganize).
241  typename GlobalTileTraits::Threads,
242  // The number of scalars per STS.
243  GemmConfig_::kScalarsPerStsA,
244  // The skew to avoid bank conflicts added in the tile W dimension.
245  128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsA /
246  GlobalTileTraits::Threads::kW * kScalarsIn4B>
248 
251  // The pointer is float const.
252  MultiplyAddScalar const,
253  // The output tile size.
254  typename GemmConfig_::OutputTile,
255  // The number of warps.
256  typename GemmConfig_::Warps,
257  // The number of threads per warp.
258  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
259  // The shape of the FMA instruction.
260  typename GemmConfig_::InstructionShape,
261  // The number of stages.
262  GemmConfig_::kStages,
263  // The number of scalars per LDS.
264  GemmConfig_::kScalarsPerLdsA,
265  // The skew.
266  SharedStoreTileTraits::kSkew>
268 };
269 
271 
272 template <enum MatrixLayout::Kind, typename GemmConfig_>
274 
276 
277 template <typename GemmConfig_>
278 struct GemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_> {
281 
283  typedef typename GemmConfig_::ScalarB Scalar;
285  typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar;
286 
288  typedef GemmGlobalTileTraits<
289  // That's B.
291  // B is column-major.
293  // The pointer is float const.
294  Scalar const,
295  // The tile has size MxK in GEMM's terminology.
297  // The threads are distributed as (threads / K) x K (the traits may reorganize).
298  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
299  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
300  GemmConfig_::kScalarsPerLdgB>
302 
304  static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar);
307  // The pointer is float.
309  // The tile has size KxN in GEMM's terminology.
310  Shape<GemmConfig_::kStages,
311  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
312  GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>,
313  // The threads are distributed as (threads / K) x K (the traits may reorganize).
314  typename GlobalTileTraits::Threads,
315  // The number of scalars per STS.
316  GemmConfig_::kScalarsPerStsB,
317  // The skew to avoid bank conflicts added in the tile W dimension.
318  128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsB /
319  GlobalTileTraits::Threads::kW * kScalarsIn4B>
321 
324  // The pointer is float const.
325  MultiplyAddScalar const,
326  // The output tile size.
327  typename GemmConfig_::OutputTile,
328  // The number of warps.
329  typename GemmConfig_::Warps,
330  // The number of threads per warp.
331  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
332  // The shape of the FMA instruction.
333  typename GemmConfig_::InstructionShape,
334  // The number of stages.
335  GemmConfig_::kStages,
336  // The number of scalars per LDS.
337  GemmConfig_::kScalarsPerLdsB,
338  // The skew.
339  SharedStoreTileTraits::kSkew>
341 };
342 
344 
345 template <typename GemmConfig_>
346 struct GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> {
349 
351  typedef typename GemmConfig_::ScalarB Scalar;
353  typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar;
354 
356  typedef GemmGlobalTileTraits<
357  // That's B.
359  // B is row-major.
361  // The pointer is float const.
362  Scalar const,
363  // The tile has size KxN in GEMM's terminology.
365  // The threads are distributed as warps x 32 (the traits may reorganize).
367  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
368  GemmConfig_::kScalarsPerLdgB>
370 
373  // The pointer is float.
375  // The tile has size KxN in GEMM's terminology.
376  Shape<GemmConfig_::kStages,
377  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
378  GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>,
379  // The threads are distributed as warps x 32 (the traits may reorganize).
380  typename GlobalTileTraits::Threads,
381  // The number of scalars per STS (STS.32 or STS.128, etc).
382  GemmConfig_::kScalarsPerStsB>
384 
387  // The pointer is float const.
388  MultiplyAddScalar const,
389  // The output tile size.
390  typename GemmConfig_::OutputTile,
391  // The number of warps.
392  typename GemmConfig_::Warps,
393  // The number of threads per warp.
394  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
395  // The shape of the FMA instruction.
396  typename GemmConfig_::InstructionShape,
397  // The number of stages.
398  GemmConfig_::kStages,
399  // The number of scalars per LDS.
400  GemmConfig_::kScalarsPerLdsB,
401  // The skew.
402  0>
404 };
405 
407 
408 template <
410  typename GemmConfig_,
412  typename GlobalLoadStreamA_,
414  typename GlobalLoadStreamB_,
416  typename SharedLoadStreamA_,
418  typename SharedLoadStreamB_,
420  typename Epilogue_,
422  typename BlockSwizzle_ = IdentityBlockSwizzle,
424  typename Index_ = int,
427 
428 struct GemmTraits {
430  typedef GemmConfig_ GemmConfig;
433 
435  typedef GlobalLoadStreamA_ GlobalLoadStreamA;
437  static MatrixLayout::Kind const kLayoutA = GlobalLoadStreamA::kLayout;
439  typedef typename GlobalLoadStreamA_::Scalar ScalarA;
440 
442  typedef GlobalLoadStreamB_ GlobalLoadStreamB;
444  static MatrixLayout::Kind const kLayoutB = GlobalLoadStreamB::kLayout;
446  typedef typename GlobalLoadStreamB_::Scalar ScalarB;
447 
449  typedef SharedLoadStreamA_ SharedLoadStreamA;
451  typedef SharedLoadStreamB_ SharedLoadStreamB;
452 
454  typedef typename GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA;
455  // Btw, make sure we did not messed up with the size of the storage.
456  static_assert(sizeof(SharedStoreStorageA) == sizeof(typename SharedLoadStreamA::SharedStorage),
457  "");
458 
460  typedef typename GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB;
461  // Btw, make sure we did not messed up with the size of the storage.
462  static_assert(sizeof(SharedStoreStorageB) == sizeof(typename SharedLoadStreamB::SharedStorage),
463  "");
464 
466  typedef typename GemmConfig::MultiplyAdd MultiplyAdd;
468  typedef Epilogue_ Epilogue;
470  typedef typename Epilogue::ScalarC ScalarC;
471  typedef typename Epilogue::ScalarD ScalarD;
472 
474  typedef BlockSwizzle_ BlockSwizzle;
476  typedef Index_ Index;
478  typedef ClearAccumulators_ ClearAccumulators;
479 
481  struct Params {
483  Index m, n, k;
485  typename GlobalLoadStreamA::Params global_stream_a;
487  typename GlobalLoadStreamB::Params global_stream_b;
489  typename SharedLoadStreamA::Params shared_stream_a;
491  typename SharedLoadStreamB::Params shared_stream_b;
493  typename Epilogue::Params epilogue;
494 
496  template <typename GemmDesc_>
497  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) {
498  // Set the problem size.
499  this->m = desc.m;
500  this->n = desc.n;
501  this->k = desc.k;
502 
503  // Initialize the iterator for A.
504  int error_code =
505  global_stream_a.initialize(reinterpret_cast<ScalarA const*>(desc.d_a), desc.lda);
506 
507  if (error_code) {
508  return error_code;
509  }
510 
511  // Initialize the iterator for B.
512  error_code = global_stream_b.initialize(reinterpret_cast<ScalarB const*>(desc.d_b), desc.ldb);
513 
514  if (error_code) {
515  return error_code;
516  }
517 
518  // The epilogue.
519  return epilogue.initialize(desc);
520  }
521  };
522 
523  // The storage for A.
524  template <typename GlobalLoadStream_, typename SharedLoadStream_>
526  // The storage needed by the global stream.
527  typename GlobalLoadStream_::SharedStorage global;
528  // The storage needed by the shared stream.
529  typename SharedLoadStream_::SharedStorage shared;
530  };
531 
532  // The storage for the main loop + prologue.
534  // The storage to shuffle the A matrix in shared memory.
536  // The storage to shuffle the B matrix in shared memory.
538  // The storage to clear the accumulators if needed.
540  };
541 
544  // The storage for the main loop.
546  // The storage for the epilogue.
547  typename Epilogue::SharedStorage epilogue;
548  };
549 
553  CUTLASS_DEVICE GlobalLoadStream(Params const& params,
554  SharedStorage& shared_storage,
555  dim3 const& block)
556  : stream_a(params.global_stream_a,
557  shared_storage.main_loop.stream_a.global,
558  cutlass::make_Coord(0, params.k, params.m),
559  cutlass::make_Coord(0, 0, block.x)),
560  stream_b(params.global_stream_b,
561  shared_storage.main_loop.stream_b.global,
562  cutlass::make_Coord(0, params.k, params.n),
563  make_Coord(0, 0, block.y)) {}
564 
566  CUTLASS_DEVICE void copy() {
567  stream_a.copy();
568  stream_b.copy();
569  }
570 
572  CUTLASS_DEVICE void commit() {
573  stream_a.commit();
574  stream_b.commit();
575  }
576 
578  CUTLASS_DEVICE void residue(Index k, bool skip_clear = false) {
579  stream_a.residue(k, skip_clear);
580  stream_b.residue(k, skip_clear);
581  }
582 
587  };
588 
592  CUTLASS_DEVICE SharedLoadStream(Params const& params, SharedStorage& shared_storage) {
593  stream_a.initialize(params.shared_stream_a, shared_storage.main_loop.stream_a.shared);
594  stream_b.initialize(params.shared_stream_b, shared_storage.main_loop.stream_b.shared);
595  }
596 
598  CUTLASS_DEVICE void copy(int step) {
599  stream_a.copy(step, fetched_a[step % 2]);
600  stream_b.copy(step, fetched_b[step % 2]);
601  }
602 
604  CUTLASS_DEVICE void commit(int step) {
605  stream_a.commit(fetched_a[step % 2], transformed_a[step % 2]);
606  stream_b.commit(fetched_b[step % 2], transformed_b[step % 2]);
607  }
608 
610  CUTLASS_DEVICE typename SharedLoadStreamA::Fragment const& fragment_a(int step) const {
611  return transformed_a[step % 2];
612  }
613 
615  CUTLASS_DEVICE typename SharedLoadStreamB::Fragment const& fragment_b(int step) const {
616  return transformed_b[step % 2];
617  }
618 
620  CUTLASS_DEVICE void inc_stage() {
621  stream_a.inc_stage();
622  stream_b.inc_stage();
623  }
624 
628  typename SharedLoadStreamA::FetchedFragment fetched_a[2];
630  typename SharedLoadStreamA::TransformedFragment transformed_a[2];
634  typename SharedLoadStreamB::FetchedFragment fetched_b[2];
636  typename SharedLoadStreamB::TransformedFragment transformed_b[2];
637  };
638 
640  static CUTLASS_DEVICE void shared_load_fence(bool in_loop) {
641  if (SharedLoadStreamA::Iterator::kRequiresLoadFence ||
642  SharedLoadStreamB::Iterator::kRequiresLoadFence) {
643  __syncthreads();
644  }
645  }
646 
648  static CUTLASS_DEVICE void shared_store_fence(bool in_loop) { __syncthreads(); }
649 };
650 
652 
653 template <typename GemmTileTraitsHelperA_, typename GemmTileTraitsHelperB_, typename Index_>
661  typedef TileStoreIterator<typename GemmTileTraitsHelperA_::SharedStoreTileTraits,
662  typename GemmTileTraitsHelperA_::SharedStoreTileTraits::Scalar,
669 
676  typedef TileStoreIterator<typename GemmTileTraitsHelperB_::SharedStoreTileTraits,
677  typename GemmTileTraitsHelperB_::SharedStoreTileTraits::Scalar,
684 
686  typedef TileLoadIterator<typename GemmTileTraitsHelperA_::SharedLoadTileTraits,
687  typename GemmTileTraitsHelperA_::Scalar,
694  typedef TileLoadIterator<typename GemmTileTraitsHelperB_::SharedLoadTileTraits,
695  typename GemmTileTraitsHelperB_::Scalar,
701 };
702 
704 
705 template <
707  MatrixLayout::Kind kLayoutA_,
709  MatrixLayout::Kind kLayoutB_,
711  typename GemmConfig_,
713  typename Epilogue_,
715  typename Index_ = int,
716  // The configuration for the A matrix.
717  typename GemmTileTraitsHelperA_ = GemmTileTraitsHelperA<kLayoutA_, GemmConfig_>,
718  // The configuration for the B matrix.
719  typename GemmTileTraitsHelperB_ = GemmTileTraitsHelperB<kLayoutB_, GemmConfig_>,
720  // The helper class to create the streams and iterators.
721  typename Helper_ =
724  // The config.
725  GemmConfig_,
726  // The stream to load A from global memory to shared memory.
727  typename Helper_::GlobalLoadStreamA,
728  // The stream to load B from global memory to shared memory.
729  typename Helper_::GlobalLoadStreamB,
730  // The stream to load A from shared memory.
731  typename Helper_::SharedLoadStreamA,
732  // The stream to load B from shared memory.
733  typename Helper_::SharedLoadStreamB,
734  // The epilogue.
735  Epilogue_,
736  // The block swizzle to reorganize the grid.
737  IdentityBlockSwizzle,
738  // The index.
739  Index_,
740  // The tool used to clear accumulators.
741  ClearAccumulators<typename GemmConfig_::Accumulators::Element> > {
742 };
743 
745 
746 } // namespace gemm
747 } // namespace cutlass
Index n
Definition: gemm_traits.h:483
+
static int const kWarpSize
The default warp size (32 threads per warp).
Definition: gemm_traits.h:104
+
Epilogue::SharedStorage epilogue
Definition: gemm_traits.h:547
+
static int const kScalarsPerStsA
Definition: gemm_traits.h:110
+
GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for B^N.
Definition: gemm_traits.h:340
+
ScalarA_ ScalarA
The scalar for A.
Definition: gemm_traits.h:82
+
GlobalLoadStreamA_ GlobalLoadStreamA
The stream to load A from global memory to shared memory.
Definition: gemm_traits.h:435
+
GlobalStoreIteratorD::Scalar ScalarD
The scalar for D.
Definition: gemm_epilogue.h:98
+
MultiplyAdd_ MultiplyAdd
The functor to do D = A*B + C.
Definition: gemm_traits.h:93
+
static int const kAccumulatorsPerLdsA
The number of accumulators that are going to be fed from one LDS A/B.
Definition: gemm_traits.h:127
+
Definition: load_store.h:42
+
static int const kScalarsPerLdsA
Definition: gemm_traits.h:111
+
SharedLoadStreamA_ SharedLoadStreamA
The iterator for A to load from shared memory.
Definition: gemm_traits.h:449
+
MultiplyAdd::InstructionShape InstructionShape
The shape of the instruction.
Definition: gemm_traits.h:95
+
Definition: convert.h:33
+
SharedLoadStreamA::Params shared_stream_a
The params for the A stream from shared memory.
Definition: gemm_traits.h:489
+
Definition: gemm_shared_tile.h:129
+
GlobalLoadStreamB_ GlobalLoadStreamB
The stream to load B from global memory to shared memory.
Definition: gemm_traits.h:442
+ +
CUTLASS_DEVICE void inc_stage()
Increment the stage.
Definition: gemm_traits.h:620
+
TileStoreIterator< typename GemmTileTraitsHelperA_::SharedStoreTileTraits, typename GemmTileTraitsHelperA_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
The iterator to store A to shared memory.
Definition: gemm_traits.h:665
+
static int const kScalarsPerLdsB
Definition: gemm_traits.h:116
+
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
+
Epilogue::ScalarD ScalarD
Definition: gemm_traits.h:471
+
The storage in shared memory.
Definition: gemm_traits.h:543
+
SharedLoadStream< SharedLoadIteratorB > SharedLoadStreamB
The stream to load B from shared memory.
Definition: gemm_traits.h:700
+
Index k
Definition: gemm_traits.h:483
+ +
Definition: gemm_global_tile.h:70
+
SharedLoadStreamA::FetchedFragment fetched_a[2]
The fragments to fetch A.
Definition: gemm_traits.h:628
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
GemmConfig_::ScalarB Scalar
The input scalar.
Definition: gemm_traits.h:283
+
GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB > SharedStoreTileTraits
The traits class to build the iterator to store data to shared memory for B^T.
Definition: gemm_traits.h:383
+
SharedLoadStreamB_ SharedLoadStreamB
The iterator for B to load from shared memory.
Definition: gemm_traits.h:451
+
static int const kScalarsPerStgD
The number of scalars per STS/LDS/STG for D.
Definition: gemm_traits.h:122
+
CUTLASS_DEVICE void copy(int step)
Trigger the copies from shared memory to registers.
Definition: gemm_traits.h:598
+
GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
The traits class to build the iterator to load data from global memory for B^N.
Definition: gemm_traits.h:301
+
Definition: convert.h:69
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
SharedLoadStreamA stream_a
The stream for A.
Definition: gemm_traits.h:626
+
SharedLoadStream< SharedLoadIteratorA > SharedLoadStreamA
The stream to load A from shared memory.
Definition: gemm_traits.h:692
+
Definition: gemm_shared_tile.h:38
+
ScalarC_ ScalarC
The scalar for C.
Definition: gemm_traits.h:86
+
CUTLASS_DEVICE void copy()
Trigger the copies from shared memory to registers.
Definition: gemm_traits.h:566
+
GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, 0 > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for A^N.
Definition: gemm_traits.h:199
+
Epilogue_ Epilogue
The epilogue.
Definition: gemm_traits.h:468
+
GlobalLoadStreamA_::Scalar ScalarA
The scalar for A.
Definition: gemm_traits.h:439
+
Definition: tile_iterator.h:62
+
GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
The traits class to build the iterator to load data from global memory for A^N.
Definition: gemm_traits.h:165
+
ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
The number of warps.
Definition: gemm_traits.h:102
+
GemmConfig_::ScalarA Scalar
The input scalar.
Definition: gemm_traits.h:147
+
Definition: gemm_shared_tile.h:198
+
GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
The shared storage for B.
Definition: gemm_traits.h:457
+
Definition: gemm_global_tile.h:159
+
Epilogue::ScalarC ScalarC
The scalars in the epilogue.
Definition: gemm_traits.h:470
+
GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
The stream to load B from global memory to shared memory.
Definition: gemm_traits.h:683
+
SharedLoadStreamB stream_b
The stream for B.
Definition: gemm_traits.h:632
+
Assemble the shared load stream for A/B.
Definition: gemm_traits.h:590
+
GlobalLoadStreamB stream_b
The stream for B.
Definition: gemm_traits.h:586
+
GemmConfig::MultiplyAdd MultiplyAdd
The multiply-add functor.
Definition: gemm_traits.h:463
+
static CUTLASS_DEVICE void shared_load_fence(bool in_loop)
The memory fence for shared loads.
Definition: gemm_traits.h:640
+
GemmConfig_ GemmConfig
The configuration.
Definition: gemm_traits.h:430
+
Definition: gemm_global_stream.h:161
+
SharedLoadStreamB::TransformedFragment transformed_b[2]
The fragments to transform B.
Definition: gemm_traits.h:636
+
Definition: gemm_traits.h:273
+
GlobalLoadStreamA stream_a
The stream for A.
Definition: gemm_traits.h:584
+
GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for A^T.
Definition: gemm_traits.h:267
+
Definition: clear_accumulators.h:38
+
StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamB > stream_b
Definition: gemm_traits.h:537
+
The params.
Definition: gemm_traits.h:481
+
static int const kScalarsPerLdgA
The number of scalars per LDG/STS/LDS for A.
Definition: gemm_traits.h:109
+
CUTLASS_DEVICE SharedLoadStreamB::Fragment const & fragment_b(int step) const
The fragment B.
Definition: gemm_traits.h:615
+
Copy< typename GlobalLoadIteratorB::Fragment > GlobalTransformerB
The data converter for B before storing to shared memory.
Definition: gemm_traits.h:674
+
GemmConfig_::ScalarB Scalar
The input scalar.
Definition: gemm_traits.h:351
+
Describes layouts of matrices.
Definition: matrix_traits.h:35
+
GemmGlobalIteratorAb< typename GemmTileTraitsHelperB_::GlobalTileTraits, Index_ > GlobalLoadIteratorB
The global iterator to load B from global memory.
Definition: gemm_traits.h:672
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+ +
Definition: matrix_traits.h:36
+
CUTLASS_DEVICE void residue(Index k, bool skip_clear=false)
Execute the residue code.
Definition: gemm_traits.h:578
+
MultiplyAdd::Accumulators Accumulators
The accumulators.
Definition: gemm_traits.h:99
+
ClearAccumulators_ ClearAccumulators
Clear the accumulators.
Definition: gemm_traits.h:478
+
Definition: gemm_shared_stream.h:44
+
GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
The traits class to build the iterator to load data from global memory for A^T.
Definition: gemm_traits.h:228
+
Defines a type for restructuring a tile.
+
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
+
Shape< A_::kD/B_::kD, A_::kH/B_::kH, A_::kW/B_::kW, A_::kC/B_::kC > Shape
Definition: shape.h:126
+
static int const kScalarsPerStsB
Definition: gemm_traits.h:115
+
Defines abstractions for efficiently clearing accumulator tiles.
+
Definition: gemm_traits.h:79
+
Assemble the global load streams for A/B.
Definition: gemm_traits.h:551
+
static int const kScalarsPerStsD
Definition: gemm_traits.h:123
+
static CUTLASS_DEVICE void shared_store_fence(bool in_loop)
The memory fence for shared stores.
Definition: gemm_traits.h:648
+
GemmConfig_::ScalarA Scalar
The input scalar.
Definition: gemm_traits.h:210
+
Definition: gemm_traits.h:137
+
CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc)
Initialize the parameters.
Definition: gemm_traits.h:497
+
GlobalLoadStream_::SharedStorage global
Definition: gemm_traits.h:527
+
Definition: matrix_traits.h:43
+
Definition: identity_block_swizzle.h:37
+
GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA > SharedStoreTileTraits
The traits class to build the iterator to store data to shared memory for A^N.
Definition: gemm_traits.h:179
+
ScalarB_ ScalarB
The scalar for B.
Definition: gemm_traits.h:84
+
GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
The scalar stored in shared memory.
Definition: gemm_traits.h:353
+
GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
The scalar stored in shared memory.
Definition: gemm_traits.h:285
+
GlobalLoadStreamB_::Scalar ScalarB
The scalar for B.
Definition: gemm_traits.h:446
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
The shared storage for A.
Definition: gemm_traits.h:454
+
GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
The stream to load A from global memory to shared memory.
Definition: gemm_traits.h:668
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Definition: gemm_traits.h:428
+
MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
The number of accumulators per warp.
Definition: gemm_traits.h:97
+
SharedLoadStreamA::TransformedFragment transformed_a[2]
The fragments to transform A.
Definition: gemm_traits.h:630
+
SharedLoadStream_::SharedStorage shared
Definition: gemm_traits.h:529
+
GlobalLoadStreamB::Params global_stream_b
The params for the B stream.
Definition: gemm_traits.h:487
+
SharedLoadStreamB::FetchedFragment fetched_b[2]
The fragments to fetch B.
Definition: gemm_traits.h:634
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
static int const kScalarsPerLdgC
The number of scalars per LDG for C.
Definition: gemm_traits.h:119
+
ScalarD_ ScalarD
The scalar for D.
Definition: gemm_traits.h:88
+
static int const kThreads
The numnber of threads.
Definition: gemm_traits.h:106
+
Defies functors for mapping blockIdx to partitions of the GEMM computation.
+
Index m
The dimensions of the GEMM.
Definition: gemm_traits.h:483
+
BlockSwizzle_ BlockSwizzle
The block swizzle to reorganize the grid.
Definition: gemm_traits.h:474
+
TileLoadIterator< typename GemmTileTraitsHelperA_::SharedLoadTileTraits, typename GemmTileTraitsHelperA_::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
The iterator to load A from shared memory.
Definition: gemm_traits.h:690
+
Definition: matrix_traits.h:36
+
TileLoadIterator< typename GemmTileTraitsHelperB_::SharedLoadTileTraits, typename GemmTileTraitsHelperB_::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
The iterator to load B from shared memory.
Definition: gemm_traits.h:698
+
CUTLASS_DEVICE SharedLoadStream(Params const &params, SharedStorage &shared_storage)
Ctor.
Definition: gemm_traits.h:592
+
CUTLASS_DEVICE GlobalLoadStream(Params const &params, SharedStorage &shared_storage, dim3 const &block)
Ctor.
Definition: gemm_traits.h:553
+
GlobalLoadIteratorC::Scalar ScalarC
The scalar for C.
Definition: gemm_epilogue.h:96
+
Index_ Index
The index.
Definition: gemm_traits.h:476
+
GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
The scalar stored in shared memory.
Definition: gemm_traits.h:149
+
TileStoreIterator< typename GemmTileTraitsHelperB_::SharedStoreTileTraits, typename GemmTileTraitsHelperB_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
The iterator to store B to shared memory.
Definition: gemm_traits.h:680
+
Epilogue::Params epilogue
The params for the epilogue.
Definition: gemm_traits.h:493
+
Kind
Definition: matrix_traits.h:36
+
GlobalLoadStreamA::Params global_stream_a
The params for the A stream.
Definition: gemm_traits.h:485
+
The shared storage.
Definition: clear_accumulators.h:40
+
CUTLASS_DEVICE void commit(int step)
Commit the data.
Definition: gemm_traits.h:604
+
static int const kScalarsPerLdsD
Definition: gemm_traits.h:124
+
Implements efficient loading of the thread block-level tile from global memory and storing to shared ...
+
MainLoopSharedStorage main_loop
Definition: gemm_traits.h:545
+
static MatrixLayout::Kind const kLayoutA
The layout of A.
Definition: gemm_traits.h:437
+
OutputTile_ OutputTile
The tile.
Definition: gemm_traits.h:91
+
static int const kScalarsPerLdgB
The number of scalars per LDG/STS/LDS for B.
Definition: gemm_traits.h:114
+
Definition: matrix_traits.h:43
+
Definition: gemm_traits.h:654
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
GemmGlobalIteratorAb< typename GemmTileTraitsHelperA_::GlobalTileTraits, Index_ > GlobalLoadIteratorA
The global iterator to load A from global memory.
Definition: gemm_traits.h:657
+
GemmConfig::OutputTile OutputTile
The output tile.
Definition: gemm_traits.h:432
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
Copy< typename GlobalLoadIteratorA::Fragment > GlobalTransformerA
The data converter for A before storing to shared memory.
Definition: gemm_traits.h:659
+
CUTLASS_DEVICE void commit()
Commit the data.
Definition: gemm_traits.h:572
+
GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, 0 > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for B^T.
Definition: gemm_traits.h:403
+
ClearAccumulators::SharedStorage clear
Definition: gemm_traits.h:539
+
StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamA > stream_a
Definition: gemm_traits.h:535
+
GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
The traits class to build the iterator to load data from global memory for B^T.
Definition: gemm_traits.h:369
+
Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEM...
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
Defines conversion operations among Fragments of different base type.
+
SharedLoadStreamB::Params shared_stream_b
The params for the B stream from shared memory.
Definition: gemm_traits.h:491
+
Definition: gemm_traits.h:723
+
CUTLASS_DEVICE SharedLoadStreamA::Fragment const & fragment_a(int step) const
The fragment A.
Definition: gemm_traits.h:610
+
static MatrixLayout::Kind const kLayoutB
The layout of B.
Definition: gemm_traits.h:444
+
static int const kAccumulatorsPerLdsB
Definition: gemm_traits.h:128
+
static int const kStages
The number of stages in shared memory to implement double, triple, more-buffering.
Definition: gemm_traits.h:131
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
+
ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
The number of accumulators per warp.
Definition: thread_multiply_add.h:51
+
GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
The scalar stored in shared memory.
Definition: gemm_traits.h:212
+
+ + + + diff --git a/docs/generated-html/globals.html b/docs/generated-html/globals.html new file mode 100644 index 00000000..ddd387b2 --- /dev/null +++ b/docs/generated-html/globals.html @@ -0,0 +1,147 @@ + + + + + + + +Cutlass: File Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all file members with links to the files they belong to:
+
+ + + + diff --git a/docs/generated-html/globals_defs.html b/docs/generated-html/globals_defs.html new file mode 100644 index 00000000..d1df12cb --- /dev/null +++ b/docs/generated-html/globals_defs.html @@ -0,0 +1,144 @@ + + + + + + + +Cutlass: File Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+ + + + diff --git a/docs/generated-html/globals_func.html b/docs/generated-html/globals_func.html new file mode 100644 index 00000000..6f910b76 --- /dev/null +++ b/docs/generated-html/globals_func.html @@ -0,0 +1,84 @@ + + + + + + + +Cutlass: File Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+ + + + diff --git a/docs/generated-html/group__fragment__concept.html b/docs/generated-html/group__fragment__concept.html new file mode 100644 index 00000000..85e35727 --- /dev/null +++ b/docs/generated-html/group__fragment__concept.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Fragment Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Fragment Concept
+
+
+ + + + + +

+Classes

struct  cutlass::Fragment< Element_, kElements_, kAlignment_ >
 A template defining Fragment Concept. More...
 
+

Detailed Description

+

Fragment Concept is a statically sized array for storing parts of tiles held by individual CUDA threads.

+
fragment_concept
Types satisfying Fragment Concept define the following members
    +
  • Element - type of each access held within the fragment
  • +
  • kElements - number of elements stored by the fragment
  • +
  • clear() - overwrites the fragment storage with zeros
  • +
  • Element & operator[](int i) - by-reference access of the ith element
  • +
  • Element const & operator[](int i) const - const by-reference access of the ith element
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__fragment__iterator__concept.html b/docs/generated-html/group__fragment__iterator__concept.html new file mode 100644 index 00000000..dc89e72e --- /dev/null +++ b/docs/generated-html/group__fragment__iterator__concept.html @@ -0,0 +1,99 @@ + + + + + + + +Cutlass: Fragment Iterator Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Fragment Iterator Concept
+
+
+ + + + + +

+Classes

struct  cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
 A template defining Fragment Iterator Concept. More...
 
+

Detailed Description

+

Fragment Iterator Concept provides structured access to the elements within a fragment with an optional bitcast to the desired access type

+
fragment_iterator_concept
Types satisfying Fragment Iterator Concept define the following members
    +
  • AccessType& operator[](int i) - provides access to the ith element of the fragment
  • +
  • AccessType& at(int d, int h, int w, int c) - applies Layout Concept to fragment and provides access to element at (d, h, w, c)
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__layout__concept.html b/docs/generated-html/group__layout__concept.html new file mode 100644 index 00000000..3fe8532c --- /dev/null +++ b/docs/generated-html/group__layout__concept.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: Layout Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Layout Concept
+
+
+ + + + + +

+Classes

struct  cutlass::Shape< kD_, kH_, kW_, kC_ >
 A Shape implementing Layout Concept describing the dimensions of a cube. More...
 
+

Detailed Description

+
Implementations of layout_concept are used to describe a cube with DxHxW elements and C
scalars per element. A HxW slice of a cube is called an image and a cube consists of D images.
+
Notations
Let Layout be an implementation of the Layout Concept.
+
Valid Expressions
    +
  • Layout::D specifies the depth of a cube
  • +
  • Layout::H specifies the height of a cube
  • +
  • Layout::W specifies the height of a cube
  • +
  • Layout::C specifies the number of channels of each element in a cube
  • +
  • Layout::W_c specifies the number of scalars of each row in one image of a cube.
  • +
  • Layout::H_w specifies the number of elements in an image slice.
  • +
  • Layout::H_w_c_specifies the number of scalars in an image slice.
  • +
  • Layout::D_h_w specifies the number of elements in a cube.
  • +
  • Layout::D_h_w_c specifies the number of scalars in a cube.
  • +
  • Layout::Strides is a Layout Concept specifying the strides.
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__predicate__iterator__concept.html b/docs/generated-html/group__predicate__iterator__concept.html new file mode 100644 index 00000000..95c1ef2e --- /dev/null +++ b/docs/generated-html/group__predicate__iterator__concept.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Predicate Iterator Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Predicate Iterator Concept
+
+
+ + + + + + + + +

+Classes

class  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator
 A const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes. More...
 
class  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator
 An iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates. More...
 
+

Detailed Description

+

Implementations of Predicate Iterator Concept enables accessing and traversing elements of a bit vector.

+
Const Predicate Iterator
A const Predicate Iterator Concept satisfies the following expressions
    +
  • ++it increments the iterator to the next predicate
  • +
  • *it returns the value of the currently pointed-to predicate
  • +
+
+
Mutable Predicate Iterator
A Predicate Iterator Concept that is non-const also satisfies the following expressions
    +
  • it.set(bool value) sets the value of the currently pointed-to predicate
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__predicate__tile__adapter.html b/docs/generated-html/group__predicate__tile__adapter.html new file mode 100644 index 00000000..a4b80992 --- /dev/null +++ b/docs/generated-html/group__predicate__tile__adapter.html @@ -0,0 +1,88 @@ + + + + + + + +Cutlass: Predicate Tile Adapter Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Predicate Tile Adapter Concept
+
+
+

Implementations of Predicate Tile Adapter Concept provide a mapping between a the elements of a Tile Traits Concept and a Predicate Vector Concept.

+
Predicate Tile Adapter
A Predicate Tile Adapter Concept satisfies the following expressions
    +
  • at(int d, int h, int w, int c) - returns the value of a predicate corresponding to the access (d, h, w, c) within the tile.
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__predicate__vector__concept.html b/docs/generated-html/group__predicate__vector__concept.html new file mode 100644 index 00000000..5147870e --- /dev/null +++ b/docs/generated-html/group__predicate__vector__concept.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: Predicate Vector Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Predicate Vector Concept
+
+
+ + + + + +

+Classes

struct  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >
 Statically sized array of bits implementing. More...
 
+

Detailed Description

+

Implementations of Predicate Vector Concept contain an ordered set of boolean predicates which may be used as conditionals in other device-side operations. Both random access and iterators offering sequential access are provided.

+
Predicate Vector
A Predicate Vector Concept satisfies the following expressions
    +
  • at(int idx) - returns the value of the indexed predicate
  • +
  • set(int idx, bool value) - sets the value of the indexed predicate
  • +
  • begin() - returns a Predicate Iterator Concept pointing to the first predicate
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__tile__load__iterator__concept.html b/docs/generated-html/group__tile__load__iterator__concept.html new file mode 100644 index 00000000..2bc4b4e3 --- /dev/null +++ b/docs/generated-html/group__tile__load__iterator__concept.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Tile Load Iterator Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Tile Load Iterator Concept
+
+
+ + + + + +

+Classes

struct  cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
 An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
 
+

Detailed Description

+

Tile Load Iterator Concept enables loading a tile from addressable memory into a fragment

+
Tile Load Iterator Concept
Types satisfying Tile Load Iterator Concept define the following members
    +
  • PredicateVector - a Predicate Vector Concept with sufficient predicate storage for each access implied by the tile traits
  • +
  • Fragment - the destination fragment type satisfying Fragment Concept
  • +
  • initialize_predicates(pred_it, bounds, block_offset) - function initializing a predicate vector according to externally specified bounds
  • +
  • load_post_increment(fragment, pred_it) - a method that loads a fragment and increments the iterator to the next tile, guarded by a Predicate Iterator Concept
  • +
  • load_post_increment(fragment) - a method that loads a fragment and increments the iterator to the next tile
  • +
  • load(fragment, pred_it) - a const method that loads a fragment, guarded by a Predicate Iterator Concept
  • +
  • load(fragment) - a method that loads a fragment
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__tile__store__iterator__concept.html b/docs/generated-html/group__tile__store__iterator__concept.html new file mode 100644 index 00000000..bde54053 --- /dev/null +++ b/docs/generated-html/group__tile__store__iterator__concept.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Tile Store Iterator Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Tile Store Iterator Concept
+
+
+ + + + + +

+Classes

struct  cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
 An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
 
+

Detailed Description

+

Tile Store Iterator Concept enables storing a tile to addressable memory

+
Tile Store Iterator Concept
Types satisfying Tile Load Iterator Concept define the following members
    +
  • PredicateVector - a Predicate Vector Concept with sufficient predicate storage for each access implied by the tile traits
  • +
  • Fragment - the destination fragment type satisfying Fragment Concept
  • +
  • initialize_predicates(pred_it, bounds, block_offset) - function initializing a predicate vector according to externally specified bounds
  • +
  • store_post_increment(fragment, pred_it) - a method that stores a fragment and increments the iterator to the next tile, guarded by a Predicate Iterator Concept
  • +
  • store_post_increment(fragment) - a method that stores a fragment and increments the iterator to the next tile
  • +
  • store(fragment, pred_it) - a const method that stores a fragment, guarded by a Predicate Iterator Concept
  • +
  • store(fragment) - a method that loads a fragment
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__tile__traits__concept.html b/docs/generated-html/group__tile__traits__concept.html new file mode 100644 index 00000000..16e4bd8a --- /dev/null +++ b/docs/generated-html/group__tile__traits__concept.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: Tile Traits Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Tile Traits Concept
+
+
+ + + + + +

+Classes

struct  cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >
 A template defining Tile Traits Concept. More...
 
+

Detailed Description

+

Tile Traits Concept is a type definining the shape of a tile and the distribution of accesses by individual entities, either threads or other.

+
Tile Traits Concept
Types satisfying Tile Traits Concept define the following members
    +
  • Tile - a type satisfying Layout Concept describing the dimensions of the tile
  • +
  • Delta - a type satisfying Layout Concept describing the increments between accesses along each dimension
  • +
  • Iterations - a type satisfying Layout Concept describing the number of accesses along each dimension
  • +
  • Offset - the type of a functor computing the offset of each participating entity as a Coord<4>.
  • +
+
+
+ + + + diff --git a/docs/generated-html/hgemm__global__tile_8h.html b/docs/generated-html/hgemm__global__tile_8h.html new file mode 100644 index 00000000..b62b8c14 --- /dev/null +++ b/docs/generated-html/hgemm__global__tile_8h.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: hgemm_global_tile.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
hgemm_global_tile.h File Reference
+
+
+ +

Tile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits. +More...

+ +

Go to the source code of this file.

+ + + + + + + +

+Classes

struct  cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
 
struct  cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/hgemm__global__tile_8h_source.html b/docs/generated-html/hgemm__global__tile_8h_source.html new file mode 100644 index 00000000..bdd647d1 --- /dev/null +++ b/docs/generated-html/hgemm__global__tile_8h_source.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: hgemm_global_tile.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
hgemm_global_tile.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 #include <cutlass/coord.h>
34 #include <cutlass/matrix_traits.h>
35 #include <cutlass/reshape_tile.h>
36 
37 namespace cutlass {
38 namespace gemm {
39 
41 
42 template <GemmOperand::Kind kOperand_,
43  MatrixLayout::Kind kLayout_,
44  typename Scalar_,
45  typename Tile_,
46  typename Threads_,
47  int kAccessSize_>
49  // Which GEMM operand?
50  kOperand_,
51  // The layout.
52  kLayout_,
53  // The scalar.
54  Scalar_,
55  // The tile.
56  Tile_,
57  // The threads.
58  Threads_,
59  // The number of scalars per LDG/STG.
60  kAccessSize_> {
64  typedef typename Base::Threads Threads;
70  typedef Shape<Base::Tile::kH / Base::Threads::kH / 2,
71  2,
72  Base::Tile::kW / Base::Threads::kW,
73  Base::Tile::kC / Base::kAccessSize>
76  struct ThreadOffset {
78  Coord<4> operator()() const {
79  int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH;
80  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
81 
82  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
83  }
84  };
85 };
86 
88 
89 } // namespace gemm
90 } // namespace cutlass
Definition: convert.h:33
+
Defines iterators for efficiently loading and storing to global memory.
+
Definition: gemm_global_tile.h:70
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
Shape< Base::Tile::kH/Base::Threads::kH/2, 2, Base::Tile::kW/Base::Threads::kW, Base::Tile::kC/Base::kAccessSize > Iterations
The number of iterations needed to load/store the tile.
Definition: hgemm_global_tile.h:74
+
Base::Threads Threads
The threads.
Definition: hgemm_global_tile.h:64
+
static int const kH
The height of the cube.
Definition: shape.h:68
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: hgemm_global_tile.h:78
+
Shape< Base::Threads::kH *2, 1, Base::Threads::kW, Base::kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: hgemm_global_tile.h:68
+
Shape< 1, 2, Base::Tile::kC > ThreadsDelta
The threads strides.
Definition: hgemm_global_tile.h:66
+
Defines a type for restructuring a tile.
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
The base class.
Definition: hgemm_global_tile.h:62
+
Definition: hgemm_global_tile.h:48
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+ +
static int const kW
The width of the cube.
Definition: shape.h:70
+
Kind
Definition: matrix_traits.h:36
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_global_tile.h:80
+
Computes the thread offset in (H, W) based on thread ID.
Definition: hgemm_global_tile.h:76
+
Kind
Definition: matrix_traits.h:43
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
+ + + + diff --git a/docs/generated-html/hgemm__multiply__add_8h.html b/docs/generated-html/hgemm__multiply__add_8h.html new file mode 100644 index 00000000..3c6c609e --- /dev/null +++ b/docs/generated-html/hgemm__multiply__add_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: hgemm_multiply_add.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
hgemm_multiply_add.h File Reference
+
+
+ +

Specialization implementing multiply-add operation on half-precision floating point fragments. +More...

+ +

Go to the source code of this file.

+ + + + + +

+Classes

struct  cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
 Template performing matrix multiply-add operation within a thread. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/hgemm__multiply__add_8h_source.html b/docs/generated-html/hgemm__multiply__add_8h_source.html new file mode 100644 index 00000000..73ef9040 --- /dev/null +++ b/docs/generated-html/hgemm__multiply__add_8h_source.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: hgemm_multiply_add.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
hgemm_multiply_add.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/fragment.h>
32 
34 
35 namespace cutlass {
36 namespace gemm {
37 
39 
41 template <typename AccumulatorsPerThread_, typename ThreadsPerWarp_>
42 struct ThreadMultiplyAdd<AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half> {
46  typedef AccumulatorsPerThread_ AccumulatorsPerThread;
48  typedef ThreadsPerWarp_ ThreadsPerWarp;
52  typedef half ScalarA;
56  typedef half ScalarB;
60  typedef half ScalarC;
63 
65  static_assert(AccumulatorsPerThread::kH % 2 == 0, "Invalid size");
66  static_assert(AccumulatorsPerThread::kW % 2 == 0, "Invalid size");
67 
69  CUTLASS_DEVICE ThreadMultiplyAdd() {}
70 
72  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
73  FragmentB const& b,
74  Accumulators const& c,
75  Accumulators& d) {
76 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
77  // The inputs.
78  __half2 const* a_half2 = reinterpret_cast<__half2 const*>(&a[0]);
79  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
80  __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]);
81 
82  // The output.
83  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
84 
85  for (int j = 0; j < AccumulatorsPerThread::kH / 2; ++j) {
86  for (int i = 0; i < AccumulatorsPerThread::kW / 2; ++i) {
87  // The offsets in the output fragment.
88  int const k0 = (2 * j + 0) * (AccumulatorsPerThread::kW / 2) + i;
89  int const k1 = (2 * j + 1) * (AccumulatorsPerThread::kW / 2) + i;
90 
91  // Compute the product a[i] * b[j].H0_H0.
92  d_half2[k0] = __hfma2(a_half2[i], __low2half2(b_half2[j]), c_half2[k0]);
93  // Compute the product a[i] * b[j].H1_H1.
94  d_half2[k1] = __hfma2(a_half2[i], __high2half2(b_half2[j]), c_half2[k1]);
95  }
96  }
97 #endif
98  }
99 };
100 
102 
103 } // namespace gemm
104 } // namespace cutlass
+
Definition: convert.h:33
+
Fragment< half, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
The accumulators.
Definition: hgemm_multiply_add.h:62
+
ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
The number of accumulators per warp.
Definition: hgemm_multiply_add.h:50
+
half ScalarC
The type for C and D.
Definition: hgemm_multiply_add.h:60
+
CUTLASS_DEVICE ThreadMultiplyAdd()
Make sure there&#39;s an even number of elements in both dimensions.
Definition: hgemm_multiply_add.h:69
+
Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
Definition: shape.h:119
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
Template implementing matrix multiply-add operations on fragments.
+
Shape< 1, 1, 2, 1 > InstructionShape
The shape of the instruction.
Definition: hgemm_multiply_add.h:44
+ +
ThreadsPerWarp_ ThreadsPerWarp
The number of threads per warp.
Definition: hgemm_multiply_add.h:48
+
AccumulatorsPerThread_ AccumulatorsPerThread
The number of accumulators per thread.
Definition: hgemm_multiply_add.h:46
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
Multiply : d = a*b + c.
Definition: hgemm_multiply_add.h:72
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Template performing matrix multiply-add operation within a thread.
Definition: thread_multiply_add.h:43
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
The fragment for A.
Definition: hgemm_multiply_add.h:54
+
Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
The fragment for B.
Definition: hgemm_multiply_add.h:58
+
+ + + + diff --git a/docs/generated-html/hgemm__swizzle_8h.html b/docs/generated-html/hgemm__swizzle_8h.html new file mode 100644 index 00000000..aef7ac75 --- /dev/null +++ b/docs/generated-html/hgemm__swizzle_8h.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: hgemm_swizzle.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
hgemm_swizzle.h File Reference
+
+
+ +

Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands. +More...

+
#include <cuda_fp16.h>
+#include <cutlass/fragment.h>
+
+

Go to the source code of this file.

+ + + + +

+Classes

struct  cutlass::gemm::HgemmSwizzle< GlobalIterator_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/hgemm__swizzle_8h_source.html b/docs/generated-html/hgemm__swizzle_8h_source.html new file mode 100644 index 00000000..bb76b510 --- /dev/null +++ b/docs/generated-html/hgemm__swizzle_8h_source.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: hgemm_swizzle.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
hgemm_swizzle.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cuda_fp16.h>
32 #include <cutlass/fragment.h>
33 
34 namespace cutlass {
35 namespace gemm {
36 
38 
39 template <typename GlobalIterator_>
40 struct HgemmSwizzle {
42  typedef GlobalIterator_ GlobalIterator;
44  typedef typename GlobalIterator::Fragment Fragment;
46  typedef typename GlobalIterator::FragmentShape FragmentShape;
47 
52 
55 
57  static_assert(FragmentShape::kH == 2 && ShapeCount<FragmentShape>::kWc == 2, "Not multiple of 2");
58 
60  CUTLASS_DEVICE HgemmSwizzle() {}
61 
63  CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) {
64  // Expose src/dst as int arrays.
65  int const* src_int = reinterpret_cast<int const*>(&src[0]);
66  int* dst_int = reinterpret_cast<int*>(&dst[0]);
67 
68  // Transpose the data.
69  for (int d = 0; d < FragmentShape::kD; ++d) {
70  // The indices to read two consecutive "rows".
71  int const i0 = 2 * d + 0;
72  int const i1 = 2 * d + 1;
73 
74  int a0 = src_int[i0];
75  int a1 = src_int[i1];
76 
77  int b0, b1;
78  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(a0), "r"(a1));
79  asm volatile("prmt.b32 %0, %1, %2, 0x7632;" : "=r"(b1) : "r"(a0), "r"(a1));
80 
81  // The indices to store with "strides".
82  int const j0 = 0 * (ShapeCount<FragmentShape>::kDhw / 2) + d;
83  int const j1 = 1 * (ShapeCount<FragmentShape>::kDhw / 2) + d;
84 
85  dst_int[j0] = b0;
86  dst_int[j1] = b1;
87  }
88  }
89 };
90 
92 
93 } // namespace gemm
94 } // namespace cutlass
GlobalIterator_ GlobalIterator
The global iterator.
Definition: hgemm_swizzle.h:42
+
Definition: convert.h:33
+
std::is_same (false specialization)
Definition: platform.h:412
+
CUTLASS_DEVICE HgemmSwizzle()
The src/dst must be half fragments.
Definition: hgemm_swizzle.h:60
+
CUTLASS_DEVICE void transform(Fragment const &src, Fragment &dst)
Transform a fragment.
Definition: hgemm_swizzle.h:63
+
Fragment InputFragment
The input fragment.
Definition: hgemm_swizzle.h:49
+
Fragment OutputFragment
The output fragment.
Definition: hgemm_swizzle.h:51
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
GlobalIterator::Fragment Fragment
The source fragment.
Definition: hgemm_swizzle.h:44
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
GlobalIterator::FragmentShape FragmentShape
The shape of the source fragment.
Definition: hgemm_swizzle.h:46
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
Definition: hgemm_swizzle.h:40
+
+ + + + diff --git a/docs/generated-html/hgemm__traits_8h.html b/docs/generated-html/hgemm__traits_8h.html new file mode 100644 index 00000000..283ceb75 --- /dev/null +++ b/docs/generated-html/hgemm__traits_8h.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: hgemm_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
hgemm_traits.h File Reference
+
+
+ +

Defies structural properties of half-precision GEMM computation. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >
 
struct  cutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ >
 
struct  cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
 
struct  cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
 
struct  cutlass::gemm::HgemmTransformerB< kLayout_, Iterator_ >
 
struct  cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
 
struct  cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
 
struct  cutlass::gemm::HgemmTileTraitsHelperA< kLayout_, GemmConfig_ >
 
struct  cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
 
struct  cutlass::gemm::HgemmTileTraitsHelperB< kLayout_, GemmConfig_ >
 
struct  cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
 
struct  cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
 
struct  cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/hgemm__traits_8h_source.html b/docs/generated-html/hgemm__traits_8h_source.html new file mode 100644 index 00000000..0d12493e --- /dev/null +++ b/docs/generated-html/hgemm__traits_8h_source.html @@ -0,0 +1,166 @@ + + + + + + + +Cutlass: hgemm_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
hgemm_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/convert.h>
31 #include <cutlass/reshape_tile.h>
32 
33 #include <cutlass/gemm/gemm.h>
42 
43 namespace cutlass {
44 namespace gemm {
45 
47 
48 template <
50  typename OutputTile_,
52  typename AccumulatorsPerThread_,
54  int kScalarsPerLdgA_ = 2,
56  int kScalarsPerLdgB_ = 2>
58  : public GemmConfig<
60  half,
62  half,
64  half,
66  half,
68  OutputTile_,
70  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, half, half, half>,
72  kScalarsPerLdgA_,
74  kScalarsPerLdgA_,
76  8,
78  kScalarsPerLdgB_,
80  kScalarsPerLdgB_,
82  8,
84  2,
86  8,
88  2,
90  2> {};
91 
93 
94 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
96 
97 template <typename Iterator_>
98 struct HgemmTransformerA<MatrixLayout::kColumnMajor, Iterator_> {
100 };
101 
102 template <typename Iterator_>
103 struct HgemmTransformerA<MatrixLayout::kRowMajor, Iterator_> {
105 };
106 
108 
109 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
111 
112 template <typename Iterator_>
113 struct HgemmTransformerB<MatrixLayout::kRowMajor, Iterator_> {
115 };
116 
117 template <typename Iterator_>
118 struct HgemmTransformerB<MatrixLayout::kColumnMajor, Iterator_> {
120 };
121 
123 
124 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
125 struct HgemmTileTraitsHelperA : public GemmTileTraitsHelperA<kLayout_, GemmConfig_> {};
126 
128 
129 template <typename GemmConfig_>
130 struct HgemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_>
131  : public GemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_> {
134 
138  // The layout.
140  // The pointer.
141  half const,
142  // The tile has size MxK in GEMM's terminology.
144  // The threads are distributed as (threads / K ) x K (the traits may reorganize).
145  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
146  // The number of scalars per LDG (LDG.32 or LDG.128, etc)
147  GemmConfig_::kScalarsPerLdgA>
149 
152  // The pointer.
153  half,
154  // The tile has size KxM in GEMM's terminology.
155  Shape<GemmConfig_::kStages,
156  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
157  GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>,
158  // The threads are distributed as warps x 32(the traits may reorganize).
159  typename GlobalTileTraits::Threads,
160  // The number of scalars per STS (STS.32 or STS.128, etc).
161  2,
162  // The skew to avoid bank conflicts added in the tile W dimension.
163  128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2>
165 
168  // The pointer.
169  half const,
170  // The output tile size.
171  typename GemmConfig_::OutputTile,
172  // The number of warps.
173  typename GemmConfig_::Warps,
174  // The number of threads per warp.
175  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
176  // The shape of the FMA instruction.
177  typename GemmConfig_::InstructionShape,
178  // The number of stages.
179  GemmConfig_::kStages,
180  // The number of scalars per LDS.
181  8,
182  // The skew.
183  SharedStoreTileTraits::kSkew>
185 };
186 
188 
189 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
190 struct HgemmTileTraitsHelperB : public GemmTileTraitsHelperB<kLayout_, GemmConfig_> {};
191 
193 
194 template <typename GemmConfig_>
195 struct HgemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_>
196  : public GemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_> {
199 
203  // The layout.
205  // The pointer.
206  half const,
207  // The tile has size KxN in GEMM's terminology.
209  // The threads are distributed as (threads / K) x K (the traits may reorganize).
210  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
211  // The number of scalars per LDG (LDG.32 or LDG.128, etc)
212  GemmConfig_::kScalarsPerLdgB>
214 
217  // The pointer.
218  half,
219  // The tile has size KxN in GEMM's terminology.
220  Shape<GemmConfig_::kStages,
221  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
222  GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>,
223  // The threads are distributed as (threads / K) x K (the traits may reorganize).
224  typename GlobalTileTraits::Threads,
225  // The number of scalars per STS (STS.32 or STS.128, etc).
226  2,
227  // The skew to avoid bank conflicts added in the tile W dimension.
228  128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2>
230 
233  // The pointer.
234  half const,
235  // The output tile size.
236  typename GemmConfig_::OutputTile,
237  // The number of warps.
238  typename GemmConfig_::Warps,
239  // The number of threads per warp.
240  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
241  // The shape of the FMA instruction.
242  typename GemmConfig_::InstructionShape,
243  // The number of stages.
244  GemmConfig_::kStages,
245  // The number of scalars per LDS.
246  8,
247  // The skew.
248  SharedStoreTileTraits::kSkew>
250 };
251 
253 
254 template <
256  MatrixLayout::Kind kLayoutA_,
258  MatrixLayout::Kind kLayoutB_,
260  typename OutputTile_,
262  typename EpilogueFunctor_,
264  typename AccumulatorsPerThread_ = Shape<32, 8, 8>,
266  int kScalarsPerLdgA_ = 2,
268  int kScalarsPerLdgB_ = 2,
270  typename Index_ = int>
279 
284  typedef typename HgemmTransformerA<GemmTileTraitsHelperA::kLayout,
287  typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits,
288  typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
295 
299  // The default transformer for B.
300  typedef typename HgemmTransformerB<GemmTileTraitsHelperB::kLayout,
303  typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits,
304  typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
311 
313  typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits,
314  typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
321  typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits,
322  typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
328 
333 
338 };
339 
341 
342 template <
344  MatrixLayout::Kind kLayoutA_,
346  MatrixLayout::Kind kLayoutB_,
348  typename OutputTile_ = Shape<8, 128, 128>,
350  typename EpilogueFunctor_ = LinearScaling<half>,
352  typename AccumulatorsPerThread_ = Shape<8, 8, 16>,
354  int kScalarsPerLdgA_ = 2,
356  int kScalarsPerLdgB_ = 2,
358  typename Index_ = int,
360  typename Helper_ = HgemmTraitsHelper<kLayoutA_,
361  kLayoutB_,
362  OutputTile_,
363  EpilogueFunctor_,
364  AccumulatorsPerThread_,
365  kScalarsPerLdgA_,
366  kScalarsPerLdgB_,
367  Index_> >
368 struct HgemmTraits : public GemmTraits<
369  // The config.
370  typename Helper_::GemmConfig,
371  // The stream to load A from global memory to shared memory.
372  typename Helper_::GlobalLoadStreamA,
373  // The stream to load B from global memory to shared memory.
374  typename Helper_::GlobalLoadStreamB,
375  // The stream to load A from shared memory.
376  typename Helper_::SharedLoadStreamA,
377  // The stream to load B from shared memory.
378  typename Helper_::SharedLoadStreamB,
379  // The epilogue.
380  typename Helper_::Epilogue,
381  // The block swizzle to reorganize the grid.
382  IdentityBlockSwizzle,
383  // The index.
384  Index_,
385  // The tool used to clear accumulators.
386  typename Helper_::ClearAccumulators> {};
387 
389 
390 } // namespace gemm
391 } // namespace cutlass
GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
The iterator to load A from global memory.
Definition: hgemm_traits.h:282
+
Definition: load_store.h:42
+
HgemmSwizzle< Iterator_ > Transformer
Definition: hgemm_traits.h:119
+
Definition: convert.h:33
+
Definition: gemm_shared_tile.h:129
+ +
Definition: gemm_epilogue.h:53
+
Defines iterators for efficiently loading and storing to global memory.
+
GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
The iterator to load B from global memory.
Definition: hgemm_traits.h:298
+
ClearAccumulators< typename MultiplyAdd::ScalarC > ClearAccumulators
The object to clear accumulators.
Definition: hgemm_traits.h:332
+
Defines structural properties of complete GEMM computation.
+
TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
The iterator to store A to shared memory.
Definition: hgemm_traits.h:291
+
GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
The stream to load A from global memory to shared memory.
Definition: hgemm_traits.h:294
+
HgemmCrosswiseGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, half const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
The traits class to build the iterator to load data from global memory for B^N.
Definition: hgemm_traits.h:213
+
Definition: hgemm_traits.h:95
+
GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Base
The base config.
Definition: hgemm_traits.h:198
+
SharedLoadStream< SharedLoadIteratorA > SharedLoadStreamA
The stream to load A from shared memory.
Definition: hgemm_traits.h:319
+
Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
Definition: hgemm_traits.h:99
+
Definition: hgemm_traits.h:368
+
HgemmSwizzle< Iterator_ > Transformer
Definition: hgemm_traits.h:104
+
Definition: tile_iterator.h:62
+
Definition: gemm_shared_tile.h:198
+
TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
The iterator to load B from shared memory.
Definition: hgemm_traits.h:325
+
Definition: gemm_global_tile.h:159
+
GemmEpilogue< GemmEpilogueTraits > Epilogue
The epilogue.
Definition: hgemm_traits.h:337
+
HgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
The default transformer for A.
Definition: hgemm_traits.h:285
+
Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
+
Definition: gemm_global_stream.h:161
+
Definition: gemm_traits.h:273
+
Definition: hgemm_traits.h:125
+
Describes layouts of matrices.
Definition: matrix_traits.h:35
+
SharedLoadStream< SharedLoadIteratorB > SharedLoadStreamB
The stream to load B from shared memory.
Definition: hgemm_traits.h:327
+
Definition: hgemm_traits.h:110
+
GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Base
The base config.
Definition: hgemm_traits.h:133
+
TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
The iterator to load A from shared memory.
Definition: hgemm_traits.h:317
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+
SimplifiedGemmEpilogueTraits< GemmConfig, EpilogueFunctor_, Index_ > GemmEpilogueTraits
The traits class for the epilogue.
Definition: hgemm_traits.h:335
+
Defines iterators for efficiently loading and storing tiles to and from shared memory.
+
Definition: matrix_traits.h:36
+ +
Definition: gemm_shared_stream.h:44
+
Defines a type for restructuring a tile.
+
Specialization implementing multiply-add operation on half-precision floating point fragments...
+
Definition: gemm_traits.h:79
+
Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for...
+
Definition: gemm_traits.h:137
+
GemmSharedLoadTileBTraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for B^N.
Definition: hgemm_traits.h:249
+
Definition: matrix_traits.h:43
+
HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > GemmConfig
The HGEMM config.
Definition: hgemm_traits.h:274
+
Definition: hgemm_traits.h:190
+
GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
The stream to load B from global memory to shared memory.
Definition: hgemm_traits.h:310
+
GemmConfig::MultiplyAdd MultiplyAdd
The functor to do the multiply-add in the main loop.
Definition: hgemm_traits.h:330
+
HgemmTileTraitsHelperB< kLayoutB_, GemmConfig > GemmTileTraitsHelperB
The GEMM config for B.
Definition: hgemm_traits.h:278
+
Definition: gemm_traits.h:428
+
Definition: hgemm_global_tile.h:48
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Definition: gemm_epilogue_traits.h:300
+
GemmSharedLoadTileATraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for A^T.
Definition: hgemm_traits.h:184
+
HgemmTileTraitsHelperA< kLayoutA_, GemmConfig > GemmTileTraitsHelperA
The GEMM config for A.
Definition: hgemm_traits.h:276
+
Template performing matrix multiply-add operation within a thread.
Definition: thread_multiply_add.h:43
+
Definition: matrix_traits.h:36
+
Kind
Definition: matrix_traits.h:36
+
HgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
Definition: hgemm_traits.h:301
+ +
Definition: hgemm_traits.h:271
+
HgemmCrosswiseGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, half const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
The traits class to build the iterator to load data from global memory for A^T.
Definition: hgemm_traits.h:148
+
Tile traits used to construct global tile iterator for HGEMM. This is intended to partition the threa...
+
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40
+
Definition: convert.h:38
+
Definition: matrix_traits.h:43
+
Implements a software-pipelined efficient GEMM.
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
Defines structural properties of the GEMM epilogue.
+
Definition: hgemm_swizzle.h:40
+
Defines conversion operations among Fragments of different base type.
+
Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
Definition: hgemm_traits.h:114
+
Definition: hgemm_traits.h:57
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
+
TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
The iterator to store B to shared memory.
Definition: hgemm_traits.h:307
+
+ + + + diff --git a/docs/generated-html/hierarchy.html b/docs/generated-html/hierarchy.html new file mode 100644 index 00000000..25ba6bda --- /dev/null +++ b/docs/generated-html/hierarchy.html @@ -0,0 +1,411 @@ + + + + + + + +Cutlass: Class Hierarchy + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Class Hierarchy
+
+
+
This inheritance list is sorted roughly, but not completely, alphabetically:
+
[detail level 123]
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Ccutlass::platform::aligned_chunk< Align >
 Ccutlass::platform::aligned_storage< Len, Align >Std::aligned_storage
 Ccutlass::AlignedStruct< kAlignment_ >
 Ccutlass::AlignedStruct< kVectorSize >
 Ccutlass::platform::alignment_of< value_t >Std::alignment_of
 Ccutlass::platform::alignment_of< double2 >
 Ccutlass::platform::alignment_of< double4 >
 Ccutlass::platform::alignment_of< float4 >
 Ccutlass::platform::alignment_of< int4 >
 Ccutlass::platform::alignment_of< long4 >
 Ccutlass::platform::alignment_of< longlong2 >
 Ccutlass::platform::alignment_of< longlong4 >
 Ccutlass::platform::alignment_of< uint4 >
 Ccutlass::platform::alignment_of< ulong4 >
 Ccutlass::platform::alignment_of< ulonglong2 >
 Ccutlass::platform::alignment_of< ulonglong4 >
 Ccutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >
 Ccutlass::ComputeOffsetFromShape< Shape_ >Compute the offset for the given coordinates in a cube
 Ccutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
 Ccutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >Compute the offset for the given coordinates in a cube with a depth of 1
 Ccutlass::ComputeOffsetFromStrides< Strides_ >Compute the offset for the given coordinates in a cube
 Ccutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
 Ccutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >Compute the offset for the given coordinates in a cube with a depth of 1
 Ccutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_
 Ccutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >Specialization for D=1 and C=1
 Ccutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >Specialization for D=1
 Ccutlass::platform::conditional< B, T, F >Std::conditional (true specialization)
 Ccutlass::platform::conditional< false, T, F >Std::conditional (false specialization)
 Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIteratorA const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes
 Ccutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >Adapter to enable random access to predicates via logical coordinate within a tile
 Ccutlass::Convert< InputFragment_, OutputFragment_ >
 Ccutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
 Ccutlass::Coord< N_ >Statically-sized array specifying Coords within a tensor
 Ccutlass::Coord< 4 >
 Ccutlass::Coord< Rank >
 Ccutlass::Copy< Fragment_ >
 Ccutlass::platform::default_delete< T >Default deleter
 Ccutlass::platform::default_delete< T[]>Partial specialization for deleting array types
 Ccutlass::divide_assert< Dividend, Divisor >
 Ccutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >
 Ccutlass::platform::enable_if< C, T >Std::enable_if (true specialization)
 Ccutlass::platform::enable_if< false, T >Std::enable_if (false specialization)
 Ccutlass::Extent< T >Returns the extent of a scalar or vector
 Ccutlass::Extent< Vector< T, Lanes > >Returns the number of lanes of a vector if need be
 Ccutlass::Extent< Vector< T, Lanes > const >Returns the number of lanes of a vector if need be
 Ccutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
 Ccutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >A template defining Fragment Iterator Concept
 Ccutlass::FragmentLoad< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::gemm::FragmentMultiplyAdd< Scalar_ >
 Ccutlass::gemm::FragmentMultiplyAdd< half >
 Ccutlass::FragmentStore< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::gemm::Gemm< GemmTraits_ >
 Ccutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
 Ccutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
 Ccutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
 Ccutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
 Ccutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
 Ccutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
 Ccutlass::gemm::GemmDesc< Scalar_, Index_ >
 Ccutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
 Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
 Ccutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
 Ccutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
 Ccutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
 Ccutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
 Ccutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
 Ccutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
 Ccutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >
 Ccutlass::GemmOperandGemm operand - D = A * B + C
 Ccutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >Helper to describe attributes of GEMM matrix operands
 Ccutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
 Ccutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
 Ccutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
 Ccutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >
 Ccutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
 Ccutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
 Ccutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperA< kLayout_, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperB< kLayout_, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
 Ccutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
 Ccutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
 Ccutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
 Ccutlass::gemm::GetExtent< kOperand_, Tile_ >
 Ccutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >
 Ccutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreamAssemble the global load streams for A/B
 Ccutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
 Ccutlass::platform::greater< T >Std::greater
 Ccutlass::gemm::HgemmSwizzle< GlobalIterator_ >
 Ccutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
 Ccutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ >
 Ccutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
 Ccutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
 Ccutlass::gemm::HgemmTransformerB< kLayout_, Iterator_ >
 Ccutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
 Ccutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
 Ccutlass::IdentityDescribes identity elements
 Ccutlass::gemm::IdentityBlockSwizzle
 Ccutlass::gemm::IgemmEpilogueScalar< ScalarD_ >
 Ccutlass::gemm::IgemmEpilogueScalar< int >
 Ccutlass::gemm::IgemmFloatToInt8Converter< kElements_ >
 Ccutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ >
 Ccutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >
 Ccutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ >
 Ccutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >
 Ccutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >
 Ccutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ >
 Ccutlass::gemm::IgemmSwizzle< GlobalIterator_ >
 Ccutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
 Ccutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ >
 Ccutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
 Ccutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
 Ccutlass::gemm::IgemmTransformerB< kLayout_, Iterator_ >
 Ccutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
 Ccutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
 Ccutlass::platform::integral_constant< value_t, V >Std::integral_constant
 Ccutlass::platform::integral_constant< bool, V >
 Ccutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
 Ccutlass::platform::integral_constant< bool,(is_base_of_helper< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)||(is_same< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)>
 Ccutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
 Ccutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
 Ccutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
 Ccutlass::platform::integral_constant< bool,(N &(N - 1))==0 >
 Ccutlass::platform::is_base_of_helper< BaseT, DerivedT >Helper for std::is_base_of
 Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::IteratorAn iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates
 Ccutlass::IteratorAdvanceSpecifies dimension in which post-increment accesses advance
 Ccutlass::IteratorFragmentSpecifies whether iterator storage fragment consists of Scalar values or WMMA matrix
 Ccutlass::platform::less< T >Std::less
 Ccutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >Functor to compute linear combination of fragments
 Ccutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >
 Ccutlass::Load< double, 2, Memory_, true, 16 >
 Ccutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >
 Ccutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >
 Ccutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >
 Ccutlass::log2_down< N, CurrentVal, Count >
 Ccutlass::log2_down< N, 1, Count >
 Ccutlass::log2_up< N, CurrentVal, Count >
 Ccutlass::log2_up< N, 1, Count >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage
 Ccutlass::MatrixLayoutDescribes layouts of matrices
 Ccutlass::MemorySpaceEnum to specify which memory space data resides in
 Ccutlass::platform::nullptr_tStd::nullptr_t
 Ccutlass::platform::alignment_of< value_t >::pad
 Ccutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::ParamsThe params
 CParams
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ParamsThe params
 Ccutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::ParamsThe params
 Ccutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ParamsParameters to the iterator
 Ccutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::ParamsThe params
 Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::ParamsThe params
 Ccutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::ParamsThe params
 Ccutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::ParamsThe parameters
 Ccutlass::platform::plus< T >Platform::plus
 Ccutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >Adapter to enable random access to predicates via logical coordinate within a tile
 Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >Statically sized array of bits implementing
 Ccutlass::PredicateVector< Base::Iterations::kW >
 Ccutlass::PredicateVector< ShapeCount< typename Base::Iterations >::kCount >
 Ccutlass::gemm::ProjectOperand< operand, Kstrided >
 Ccutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >Project A operand - (0, K, M)
 Ccutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >Project B operand - (0, K, N)
 Ccutlass::gemm::ProjectOperand< GemmOperand::kC, true >Project C operand - (0, N, M)
 Ccutlass::gemm::ProjectOperand< GemmOperand::kD, true >Project D operand - (0, N, M)
 Ccutlass::platform::remove_const< T >Std::remove_const (non-const specialization)
 Ccutlass::platform::remove_const< const T >Std::remove_const (const specialization)
 Ccutlass::platform::remove_cv< T >Std::remove_cv
 Ccutlass::platform::remove_volatile< T >Std::remove_volatile (non-volatile specialization)
 Ccutlass::platform::remove_volatile< volatile T >Std::remove_volatile (volatile specialization)
 Ccutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >
 Ccutlass::gemm::ReshapeThreads< Tile_, Threads_, true >
 Ccutlass::ReshapeTile< Tile_, kAccessSize_, bool >
 Ccutlass::ReshapeTile< Tile_, kAccessSize_, true >
 Ccutlass::Shape< kD_, kH_, kW_, kC_ >A Shape implementing Layout Concept describing the dimensions of a cube
 Ccutlass::ShapeAdd< A_, B_ >
 Ccutlass::ShapeCount< Shape >Compute derived counted of a Layout Concept based class
 Ccutlass::ShapeDiv< A_, B_ >
 Ccutlass::ShapeMax< A_, B_ >
 Ccutlass::ShapeMin< A_, B_ >
 Ccutlass::ShapeMul< A_, B_ >
 Ccutlass::ShapeScale< A_, kScale_ >
 Ccutlass::ShapeStrides< Shape_ >
 Ccutlass::ShapeSub< A_, B_ >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreamAssemble the shared load stream for A/B
 Ccutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
 Ccutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorageThe shared storage
 Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorageThe shared memory to swizzle the data in the epilogue
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorageThe storage in shared memory
 Ccutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorageThe storage in shared memory needed by that stream
 Ccutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
 Ccutlass::sqrt_est< N >
 Ccutlass::StorageType< kAlignment_ >
 Ccutlass::StorageType< 1 >
 Ccutlass::StorageType< 2 >
 Ccutlass::StorageType< 4 >
 Ccutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >
 Ccutlass::Store< double, 2, Memory_, true, 16 >
 Ccutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >
 Ccutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >
 Ccutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >
 Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorageThe shared memory storage to exchange data
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamA >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamB >
 Ccutlass::TensorRef< Storage_, Rank_ >Structure modeling a pointer and stride into a tensor
 Ccutlass::TensorRef< T, 4 >
 Ccutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >Template performing matrix multiply-add operation within a thread
 Ccutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >Template performing matrix multiply-add operation within a thread
 Ccutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >Template performing matrix multiply-add operation within a thread
 Ccutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset
 Ccutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset
 Ccutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::TiledThreadOffset< ThreadShape >Basic thread offset function computed from a thread shape
 Ccutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >Iterator for accessing a stripmined tile in memory
 Ccutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
 Ccutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
 Ccutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >A template defining Tile Traits Concept
 Ccutlass::TileTraitsContiguousMajor< Tile_, Threads >
 Ccutlass::TileTraitsStandard< Tile_, Threads >Chooses 'best' shape to enable warp raking along contiguous dimension if possible
 Ccutlass::TileTraitsStrideMajor< Tile_, Threads >
 Ccutlass::TileTraitsWarpRake< Tile_, Threads >Tiling in which warps rake across the contiguous dimension
 Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIteratorIterator that always returns true
 Ccutlass::TrivialPredicateTileAdapterAlways returns true predicate
 Ccutlass::platform::unique_ptr< T, Deleter >Std::unique_ptr
 Ccutlass::Vector< Scalar_, kLanes_ >
 Ccutlass::Vector< half, kLanes_ >
 Ccutlass::Vectorize< Element_, kLanes_ >
 Ccutlass::Vectorize< Element_, 1 >
 Ccutlass::VectorTraits< T >Traits describing properties of vectors and scalar-as-vectors
 Ccutlass::VectorTraits< Vector< T, Lanes > >Partial specialization for actual cutlass::Vector
 Ccutlass::VectorTraits< Vector< T, Lanes > const >Partial specialization for actual cutlass::Vector
+
+
+ + + + diff --git a/docs/generated-html/identity__block__swizzle_8h.html b/docs/generated-html/identity__block__swizzle_8h.html new file mode 100644 index 00000000..3da48ad4 --- /dev/null +++ b/docs/generated-html/identity__block__swizzle_8h.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: identity_block_swizzle.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
identity_block_swizzle.h File Reference
+
+
+ +

Defies functors for mapping blockIdx to partitions of the GEMM computation. +More...

+ +

Go to the source code of this file.

+ + + + +

+Classes

struct  cutlass::gemm::IdentityBlockSwizzle
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+

Detailed Description

+

Currently, we only implement an identity mapping.

+
+ + + + diff --git a/docs/generated-html/identity__block__swizzle_8h_source.html b/docs/generated-html/identity__block__swizzle_8h_source.html new file mode 100644 index 00000000..fb44c26b --- /dev/null +++ b/docs/generated-html/identity__block__swizzle_8h_source.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: identity_block_swizzle.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
identity_block_swizzle.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
39  CUTLASS_DEVICE IdentityBlockSwizzle() {}
40 
42  CUTLASS_DEVICE dim3 swizzle() { return blockIdx; }
43 };
44 
46 
47 } // namespace gemm
48 } // namespace cutlass
Definition: convert.h:33
+
CUTLASS_DEVICE IdentityBlockSwizzle()
Ctor.
Definition: identity_block_swizzle.h:39
+
CUTLASS_DEVICE dim3 swizzle()
Swizzle the block index.
Definition: identity_block_swizzle.h:42
+
Definition: identity_block_swizzle.h:37
+
+ + + + diff --git a/docs/generated-html/igemm__epilogue_8h.html b/docs/generated-html/igemm__epilogue_8h.html new file mode 100644 index 00000000..9b5e5ccf --- /dev/null +++ b/docs/generated-html/igemm__epilogue_8h.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: igemm_epilogue.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
igemm_epilogue.h File Reference
+
+ + + + + diff --git a/docs/generated-html/igemm__epilogue_8h_source.html b/docs/generated-html/igemm__epilogue_8h_source.html new file mode 100644 index 00000000..bfef820a --- /dev/null +++ b/docs/generated-html/igemm__epilogue_8h_source.html @@ -0,0 +1,168 @@ + + + + + + + +Cutlass: igemm_epilogue.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
igemm_epilogue.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/convert.h>
32 #include <cutlass/fragment.h>
36 #include <cutlass/reshape_tile.h>
37 #include <cutlass/tile_iterator.h>
38 
39 namespace cutlass {
40 namespace gemm {
41 
43 
44 template <int kElements_>
50 
51  // We are packing 4 floats into int32 registers so we need kElements to be multiple of 4.
52  static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4");
53 
55  CUTLASS_DEVICE IgemmFloatToInt8Converter() {}
56 
58  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
59  transform(src, 0, dst);
60  }
61 
63  template <typename Fragment_>
64  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
65  // The inputs.
66  float4 const* src_f4 = reinterpret_cast<float4 const*>(&src[0]);
67  // The outputs.
68  int* dst_int = reinterpret_cast<int*>(&dst[0]);
69 
70  // Iterate over the floats and pack them together to produce ints.
71  for (int i = 0; i < kElements_ / 4; ++i) {
72  // Read the float4.
73  float4 f4 = src_f4[i];
74 
75  // Clamp the 4 elements of the floats to the [-128, +127] range.
76  float x = fmaxf(-128.f, fminf(127.f, f4.x));
77  float y = fmaxf(-128.f, fminf(127.f, f4.y));
78  float z = fmaxf(-128.f, fminf(127.f, f4.z));
79  float w = fmaxf(-128.f, fminf(127.f, f4.w));
80 
81  // Convert to integers.
82  int ix = (int)x;
83  int iy = (int)y;
84  int iz = (int)z;
85  int iw = (int)w;
86 
87  // Extract the lower bytes to build an int32 with 4 int8.
88  asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(ix) : "r"(iy));
89  asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(iz) : "r"(iw));
90  asm volatile("prmt.b32 %0, %0, %1, 0x5410;" : "+r"(ix) : "r"(iz));
91 
92  // Store the int.
93  dst_int[i] = ix;
94  }
95  }
96 };
97 
99 
100 template <typename InputScalar_, typename OutputFragment_>
103 };
104 
105 template <int kElements_>
106 struct IgemmGlobalStoreTransformer<float, Fragment<int8_t, kElements_> > {
108 };
109 
111 
112 template <int kElements_>
118 
119  // We are unpacking 4 int8s from int32.
120  static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4");
121 
123  CUTLASS_DEVICE IgemmInt8ToFloatConverter() {}
124 
126  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
127  transform(src, 0, dst);
128  }
129 
131  template <typename Fragment_>
132  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
133  // The inputs.
134  int const* src_int = reinterpret_cast<int const*>(&src[0]);
135  // The outputs.
136  float4* dst_f4 = reinterpret_cast<float4*>(&dst[0]);
137 
138  // Iterate over the int8 and unpack them together to produce floats.
139  for (int i = 0; i < kElements_ / 4; ++i) {
140  // Read the int.
141  int ix, iy, iz, iw = src_int[i];
142 
143  // Extract the 4 bytes.
144  asm volatile("prmt.b32 %0, 0x0, %1, 0x4440;" : "=r"(ix) : "r"(iw));
145  asm volatile("prmt.b32 %0, 0x0, %1, 0x4441;" : "=r"(iy) : "r"(iw));
146  asm volatile("prmt.b32 %0, 0x0, %1, 0x4442;" : "=r"(iz) : "r"(iw));
147  asm volatile("prmt.b32 %0, 0x0, %1, 0x4443;" : "=r"(iw) : "r"(iw));
148 
149  // The floats.
150  float fx, fy, fz, fw;
151 
152  // Convert to floats (make sure we generate I2F.F32.S8).
153  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fx) : "r"(ix));
154  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fy) : "r"(iy));
155  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fz) : "r"(iz));
156  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fw) : "r"(iw));
157 
158  // Store the float4.
159  dst_f4[i] = make_float4(fx, fy, fz, fw);
160  }
161  }
162 };
163 
165 
166 template <typename InputFragment_, typename OutputScalar_>
169 };
170 
171 template <int kElements_>
172 struct IgemmGlobalLoadTransformer<Fragment<int8_t, kElements_>, float> {
174 };
175 
177 
178 template <typename InputScalar_, typename OutputFragment_>
181 };
182 
184 
185 template <typename IgemmConfig_, typename EpilogueFunctor_, typename Index_>
187  : public GemmEpilogueTraitsHelper<IgemmConfig_, EpilogueFunctor_, Index_> {
191  typedef IgemmConfig_ IgemmConfig;
192 
194  typedef typename Base::Scalar Scalar;
196  typedef typename Base::Iterations Iterations;
198  typedef typename Base::Delta Delta;
199 
207  typedef
209 
217  typedef
219 
232  SharedStoreFragmentD>::Transformer
242 };
243 
245 
246 template <
248  typename IgemmConfig_,
250  typename EpilogueFunctor_,
252  typename Index_ = int,
256  // The output tile.
257  typename IgemmConfig_::OutputTile,
258  // The accumulators.
259  typename IgemmConfig_::Accumulators,
260  // The global iterator for C.
261  typename Helper_::GlobalLoadIteratorC,
262  // The transformer for C.
263  typename Helper_::GlobalTransformerC,
264  // The transformer for D.
265  typename Helper_::GlobalTransformerD,
266  // The global iterator for D.
267  typename Helper_::GlobalStoreIteratorD,
268  // The iterator to store D to shared memory.
269  typename Helper_::SharedStoreIteratorD,
270  // The shared store transformer for D.
271  typename Helper_::SharedStoreTransformerD,
272  // The iterator to load D from shared memory.
273  typename Helper_::SharedLoadIteratorD,
274  // The iterations.
275  typename Helper_::Iterations,
276  // The strides between iterations.
277  typename Helper_::Delta,
278  // The functor to be used in the epilogue.
279  EpilogueFunctor_,
280  // The index.
281  Index_> {
283  static bool const kInt8Output =
285 };
286 
288 
289 template <typename GemmEpilogueTraits_, bool = GemmEpilogueTraits_::kInt8Output>
290 struct IgemmEpilogue : public GemmEpilogue<GemmEpilogueTraits_> {
293 
295  CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_,
296  typename Base::SharedStorage& shared_storage_,
297  typename Base::Index m_,
298  typename Base::Index n_)
299  : Base(params_, shared_storage_, m_, n_) {}
300 };
301 
303 
304 template <typename GemmEpilogueTraits_>
305 struct IgemmEpilogue<GemmEpilogueTraits_, true> : public GemmEpilogue<GemmEpilogueTraits_> {
308 
310  CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_,
311  typename Base::SharedStorage& shared_storage_,
312  typename Base::Index m_,
313  typename Base::Index n_)
314  : Base(params_, shared_storage_, m_, n_) {}
315 };
316 
318 
319 } // namespace gemm
320 } // namespace cutlass
Definition: gemm_global_tile.h:116
+
Definition: igemm_epilogue.h:255
+
Definition: load_store.h:42
+
Base::Delta Delta
The iterations strides.
Definition: igemm_epilogue.h:198
+
Base::Fragment Fragment
Fragment definition.
Definition: tile_iterator.h:682
+
Base::SharedStoreTileTraits SharedStoreTileTraits
The traits class for the shared iterator to store D to shared memory.
Definition: igemm_epilogue.h:221
+
IgemmGlobalStoreTransformer< Scalar, GlobalFragmentD >::Transformer GlobalTransformerD
The transformer from accumulators to shared memory fragments.
Definition: igemm_epilogue.h:218
+
Definition: convert.h:33
+
Base::SharedLoadTileTraits SharedLoadTileTraits
The traits class for the shared iterator to load D from shared memory.
Definition: igemm_epilogue.h:235
+
TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorD
The shared iterator to load D from shared memory.
Definition: igemm_epilogue.h:241
+
Definition: gemm_epilogue_traits.h:171
+
GemmEpilogue< GemmEpilogueTraits_ > Base
The base class.
Definition: igemm_epilogue.h:292
+
Traits::Params Params
The params.
Definition: gemm_epilogue.h:57
+
Definition: gemm_epilogue.h:53
+
Definition: igemm_epilogue.h:167
+
std::is_same (false specialization)
Definition: platform.h:412
+
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
+
CUTLASS_DEVICE IgemmInt8ToFloatConverter()
Ctor.
Definition: igemm_epilogue.h:123
+
SharedStoreIteratorD::Fragment SharedStoreFragmentD
The fragment that needs to be passed to that store iterator.
Definition: igemm_epilogue.h:229
+
EpilogueFunctor_::Scalar Scalar
The scalar.
Definition: gemm_epilogue_traits.h:173
+
Definition: igemm_epilogue.h:186
+
Definition: load_store.h:43
+
Fragment< int8_t, kElements_ > InputFragment
The input fragment.
Definition: igemm_epilogue.h:115
+
Definition: igemm_epilogue.h:290
+
Definition: igemm_epilogue.h:45
+
CUTLASS_DEVICE void transform(Fragment_ const &src, int offset, OutputFragment &dst)
Transform a fragment.
Definition: igemm_epilogue.h:64
+
Traits::SharedStorage SharedStorage
The shared storage.
Definition: gemm_epilogue.h:59
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
Definition: tile_iterator.h:62
+
CUTLASS_DEVICE void transform(InputFragment const &src, OutputFragment &dst)
Transform a fragment.
Definition: igemm_epilogue.h:126
+
Base::Scalar Scalar
The scalar type of the epilogue.
Definition: igemm_epilogue.h:194
+
GlobalLoadIteratorC::Fragment GlobalFragmentC
The fragment that needs to be produced by the load iterator.
Definition: igemm_epilogue.h:205
+
CUTLASS_DEVICE void transform(InputFragment const &src, OutputFragment &dst)
Transform a fragment.
Definition: igemm_epilogue.h:58
+
Fragment< int8_t, kElements_ > OutputFragment
The output fragment.
Definition: igemm_epilogue.h:49
+
GemmGlobalIteratorCd< GlobalStoreTileTraits > GlobalStoreIteratorD
The iterator to store to shared memory.
Definition: igemm_epilogue.h:213
+
IgemmSharedStoreTransformer< typename IgemmConfig::Accumulators::Element, SharedStoreFragmentD >::Transformer SharedStoreTransformerD
The transformer from accumulators to shared memory fragments.
Definition: igemm_epilogue.h:233
+
static bool const kInt8Output
Do we output in int8?
Definition: igemm_epilogue.h:283
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+
Convert< Fragment< InputScalar_, OutputFragment_::kElements >, OutputFragment_ > Transformer
Definition: igemm_epilogue.h:180
+
GemmEpilogue< GemmEpilogueTraits_ > Base
The base class.
Definition: igemm_epilogue.h:307
+
Defines a type for restructuring a tile.
+
Base::GlobalLoadTileTraits GlobalLoadTileTraits
The traits class for the iterator.
Definition: igemm_epilogue.h:201
+
Fragment< float, kElements_ > OutputFragment
The output fragment.
Definition: igemm_epilogue.h:117
+
GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Base
The base class.
Definition: igemm_epilogue.h:189
+
CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)
Ctor.
Definition: igemm_epilogue.h:295
+
Definition: gemm_shared_tile.h:335
+
Traits::Index Index
The index.
Definition: gemm_epilogue.h:93
+
GlobalStoreIteratorD::Fragment GlobalFragmentD
The fragment that needs to be passed to that store iterator.
Definition: igemm_epilogue.h:215
+
GemmGlobalIteratorCd< GlobalLoadTileTraits > GlobalLoadIteratorC
The iterator to store to shared memory.
Definition: igemm_epilogue.h:203
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
IgemmConfig_ IgemmConfig
The config.
Definition: igemm_epilogue.h:191
+
CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)
Ctor.
Definition: igemm_epilogue.h:310
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
CUTLASS_DEVICE IgemmFloatToInt8Converter()
Ctor.
Definition: igemm_epilogue.h:55
+
Element_ Element
The element.
Definition: fragment.h:108
+
Fragment< float, kElements_ > InputFragment
The input fragment.
Definition: igemm_epilogue.h:47
+
Definition: gemm_epilogue_traits.h:70
+
Definition: gemm_global_tile.h:348
+
Definition: igemm_epilogue.h:179
+
Implements efficient loading of the thread block-level tile from global memory and storing to shared ...
+
Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSize > Fragment
The fragment.
Definition: tile_iterator.h:154
+
Definition: convert.h:38
+
IgemmFloatToInt8Converter< kElements_ > Transformer
Definition: igemm_epilogue.h:107
+
Base::Iterations Iterations
The iterations.
Definition: igemm_epilogue.h:196
+
IgemmGlobalLoadTransformer< GlobalFragmentC, Scalar >::Transformer GlobalTransformerC
The transformer from loaded data to math fragment.
Definition: igemm_epilogue.h:208
+
Base::GlobalStoreTileTraits GlobalStoreTileTraits
The traits class for the iterator.
Definition: igemm_epilogue.h:211
+
Convert< InputFragment_, Fragment< OutputScalar_, InputFragment_::kElements > > Transformer
Definition: igemm_epilogue.h:168
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:266
+
CUTLASS_DEVICE void transform(Fragment_ const &src, int offset, OutputFragment &dst)
Transform a fragment.
Definition: igemm_epilogue.h:132
+
Convert< Fragment< InputScalar_, OutputFragment_::kElements >, OutputFragment_ > Transformer
Definition: igemm_epilogue.h:102
+
Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEM...
+
Definition: igemm_epilogue.h:101
+
TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal > SharedStoreIteratorD
The shared iterator to store D to shared memory.
Definition: igemm_epilogue.h:227
+
IgemmInt8ToFloatConverter< kElements_ > Transformer
Definition: igemm_epilogue.h:173
+
Defines conversion operations among Fragments of different base type.
+
Definition: igemm_epilogue.h:113
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:337
+
Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load ea...
+
Definition: gemm_shared_tile.h:264
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
+
+ + + + diff --git a/docs/generated-html/igemm__global__tile_8h.html b/docs/generated-html/igemm__global__tile_8h.html new file mode 100644 index 00000000..d6a68016 --- /dev/null +++ b/docs/generated-html/igemm__global__tile_8h.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: igemm_global_tile.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
igemm_global_tile.h File Reference
+
+
+ +

Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory. +More...

+ +

Go to the source code of this file.

+ + + + + + + +

+Classes

struct  cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
 
struct  cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+

Detailed Description

+

Supports efficient loads from shared memory to target the DP4A instruction.

+
+ + + + diff --git a/docs/generated-html/igemm__global__tile_8h_source.html b/docs/generated-html/igemm__global__tile_8h_source.html new file mode 100644 index 00000000..df086169 --- /dev/null +++ b/docs/generated-html/igemm__global__tile_8h_source.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: igemm_global_tile.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
igemm_global_tile.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
33 #pragma once
34 
35 #include <cutlass/coord.h>
37 #include <cutlass/matrix_traits.h>
38 
39 namespace cutlass {
40 namespace gemm {
41 
43 
44 template <GemmOperand::Kind kOperand_,
45  MatrixLayout::Kind kLayout_,
46  typename Scalar_,
47  typename Tile_,
48  typename Threads_,
49  int kAccessSize_>
51  // Which GEMM operand?
52  kOperand_,
53  // The layout.
54  kLayout_,
55  // The scalar.
56  Scalar_,
57  // The tile.
58  Tile_,
59  // The threads.
60  Threads_,
61  // The number of scalars per LDG/STG.
62  kAccessSize_> {
66  typedef typename Base::Threads Threads;
70  typedef Shape<Base::Tile::kH / Base::Threads::kH / 4,
71  4,
72  Base::Tile::kW / Base::Threads::kW,
73  Base::Tile::kC / Base::kAccessSize>
75 
77  struct ThreadOffset {
79  Coord<4> operator()() const {
80  int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH;
81  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
82 
83  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
84  }
85  };
86 
87  public:
90 };
91 
93 
94 } // namespace gemm
95 } // namespace cutlass
Computes the thread offset in (H, W) based on thread ID.
Definition: igemm_global_tile.h:77
+
Definition: convert.h:33
+
Defines iterators for efficiently loading and storing to global memory.
+
Definition: gemm_global_tile.h:70
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
Shape< Base::Threads::kH *4, 1, Base::Threads::kW, Base::kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: igemm_global_tile.h:68
+
static int const kH
The height of the cube.
Definition: shape.h:68
+
GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
The base class.
Definition: igemm_global_tile.h:64
+
Shape< Base::Tile::kH/Base::Threads::kH/4, 4, Base::Tile::kW/Base::Threads::kW, Base::Tile::kC/Base::kAccessSize > Iterations
The number of iterations needed to load/store the tile.
Definition: igemm_global_tile.h:74
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
Definition: igemm_global_tile.h:50
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+ +
static int const kW
The width of the cube.
Definition: shape.h:70
+
Kind
Definition: matrix_traits.h:36
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_global_tile.h:80
+
Kind
Definition: matrix_traits.h:43
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
Shape< 1, 4, Base::Tile::kC > ThreadsDelta
The threads strides.
Definition: igemm_global_tile.h:89
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: igemm_global_tile.h:79
+
Base::Threads Threads
The threads.
Definition: igemm_global_tile.h:66
+
+ + + + diff --git a/docs/generated-html/igemm__multiply__add_8h.html b/docs/generated-html/igemm__multiply__add_8h.html new file mode 100644 index 00000000..266cb5f1 --- /dev/null +++ b/docs/generated-html/igemm__multiply__add_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: igemm_multiply_add.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
igemm_multiply_add.h File Reference
+
+
+ +

Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction. +More...

+ +

Go to the source code of this file.

+ + + + + +

+Classes

struct  cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
 Template performing matrix multiply-add operation within a thread. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/igemm__multiply__add_8h_source.html b/docs/generated-html/igemm__multiply__add_8h_source.html new file mode 100644 index 00000000..414c2ce1 --- /dev/null +++ b/docs/generated-html/igemm__multiply__add_8h_source.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: igemm_multiply_add.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
igemm_multiply_add.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/fragment.h>
32 
34 
35 namespace cutlass {
36 namespace gemm {
37 
39 
41 template <typename AccumulatorsPerThread_, typename ThreadsPerWarp_>
42 struct ThreadMultiplyAdd<AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int> {
46  typedef AccumulatorsPerThread_ AccumulatorsPerThread;
48  typedef ThreadsPerWarp_ ThreadsPerWarp;
52  typedef int8_t ScalarA;
56  typedef int8_t ScalarB;
60  typedef int ScalarC;
63 
65  CUTLASS_DEVICE ThreadMultiplyAdd() {}
66 
68  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
69  FragmentB const& b,
70  Accumulators const& c,
71  Accumulators& d) {
72  // The inputs.
73  int const* a_int = reinterpret_cast<int const*>(&a[0]);
74  int const* b_int = reinterpret_cast<int const*>(&b[0]);
75 
76  for (int j = 0; j < AccumulatorsPerThread::kH; ++j) {
77  for (int i = 0; i < AccumulatorsPerThread::kW; ++i) {
78  asm volatile("dp4a.s32.s32 %0, %1, %2, %3;"
79  : "=r"(d[j * AccumulatorsPerThread::kW + i])
80  : "r"(a_int[i]), "r"(b_int[j]), "r"(c[j * AccumulatorsPerThread::kW + i]));
81  }
82  }
83  }
84 };
85 
87 
88 } // namespace gemm
89 } // namespace cutlass
+
Definition: convert.h:33
+
Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
Definition: shape.h:119
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
Template implementing matrix multiply-add operations on fragments.
+
Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
The accumulators.
Definition: igemm_multiply_add.h:62
+
ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
The number of accumulators per warp.
Definition: igemm_multiply_add.h:50
+
Fragment< ScalarB, AccumulatorsPerThread::kH *4 > FragmentB
The fragment for B.
Definition: igemm_multiply_add.h:58
+ +
Shape< 4, 1, 1 > InstructionShape
The shape of the instruction.
Definition: igemm_multiply_add.h:44
+
ThreadsPerWarp_ ThreadsPerWarp
The number of threads per warp.
Definition: igemm_multiply_add.h:48
+
AccumulatorsPerThread_ AccumulatorsPerThread
The number of accumulators per thread.
Definition: igemm_multiply_add.h:46
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Template performing matrix multiply-add operation within a thread.
Definition: thread_multiply_add.h:43
+
Fragment< ScalarA, AccumulatorsPerThread::kW *4 > FragmentA
The fragment for A.
Definition: igemm_multiply_add.h:54
+ + +
CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
Multiply : d = a*b + c.
Definition: igemm_multiply_add.h:68
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
+ + + + diff --git a/docs/generated-html/igemm__swizzle_8h.html b/docs/generated-html/igemm__swizzle_8h.html new file mode 100644 index 00000000..a631d215 --- /dev/null +++ b/docs/generated-html/igemm__swizzle_8h.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: igemm_swizzle.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
igemm_swizzle.h File Reference
+
+
+ +

Transposes a fragment of data containing packed 8-bit integer elements. +More...

+
#include <cutlass/fragment.h>
+
+

Go to the source code of this file.

+ + + + +

+Classes

struct  cutlass::gemm::IgemmSwizzle< GlobalIterator_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/igemm__swizzle_8h_source.html b/docs/generated-html/igemm__swizzle_8h_source.html new file mode 100644 index 00000000..93990830 --- /dev/null +++ b/docs/generated-html/igemm__swizzle_8h_source.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: igemm_swizzle.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
igemm_swizzle.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/fragment.h>
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename GlobalIterator_>
38 struct IgemmSwizzle {
40  typedef GlobalIterator_ GlobalIterator;
42  typedef typename GlobalIterator::Fragment Fragment;
44  typedef typename GlobalIterator::FragmentShape FragmentShape;
45 
50 
53 
55  static_assert(FragmentShape::kH % 4 == 0 && ShapeCount<FragmentShape>::kWc % 4 == 0,
56  "Not multiple of 4");
57 
59  CUTLASS_DEVICE IgemmSwizzle() {}
60 
62  CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) {
63  // Expose src/dst as int arrays.
64  int const* src_int = reinterpret_cast<int const*>(&src[0]);
65  int* dst_int = reinterpret_cast<int*>(&dst[0]);
66 
67  // Transpose the data.
68  for (int d = 0; d < FragmentShape::kD; ++d) {
69  for (int h = 0; h < FragmentShape::kH / 4; ++h) {
70  for (int w = 0; w < ShapeCount<FragmentShape>::kWc / 4; ++w) {
71  int const i0 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
72  (4 * h + 0) * (ShapeCount<FragmentShape>::kWc / 4) + w;
73  int const i1 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
74  (4 * h + 1) * (ShapeCount<FragmentShape>::kWc / 4) + w;
75  int const i2 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
76  (4 * h + 2) * (ShapeCount<FragmentShape>::kWc / 4) + w;
77  int const i3 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
78  (4 * h + 3) * (ShapeCount<FragmentShape>::kWc / 4) + w;
79 
80  int a0 = src_int[i0];
81  int a1 = src_int[i1];
82  int a2 = src_int[i2];
83  int a3 = src_int[i3];
84 
85  int b0, b1, b2, b3, c0;
86  asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(b0) : "r"(a0), "r"(a1));
87  asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(c0) : "r"(a2), "r"(a3));
88  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(b0), "r"(c0));
89 
90  asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(b1) : "r"(a0), "r"(a1));
91  asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(c0) : "r"(a2), "r"(a3));
92  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b1) : "r"(b1), "r"(c0));
93 
94  asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(b2) : "r"(a0), "r"(a1));
95  asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(c0) : "r"(a2), "r"(a3));
96  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b2) : "r"(b2), "r"(c0));
97 
98  asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(b3) : "r"(a0), "r"(a1));
99  asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(c0) : "r"(a2), "r"(a3));
100  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b3) : "r"(b3), "r"(c0));
101 
102  dst_int[i0] = b0;
103  dst_int[i1] = b1;
104  dst_int[i2] = b2;
105  dst_int[i3] = b3;
106  }
107  }
108  }
109  }
110 };
111 
113 
114 } // namespace gemm
115 } // namespace cutlass
Definition: convert.h:33
+
std::is_same (false specialization)
Definition: platform.h:412
+
GlobalIterator::FragmentShape FragmentShape
The shape of the source fragment.
Definition: igemm_swizzle.h:44
+
Definition: igemm_swizzle.h:38
+
GlobalIterator_ GlobalIterator
The global iterator.
Definition: igemm_swizzle.h:40
+
CUTLASS_DEVICE void transform(Fragment const &src, Fragment &dst)
Transform a fragment.
Definition: igemm_swizzle.h:62
+
Fragment OutputFragment
The destination fragment.
Definition: igemm_swizzle.h:49
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Fragment InputFragment
The source fragment.
Definition: igemm_swizzle.h:47
+
GlobalIterator::Fragment Fragment
The source fragment.
Definition: igemm_swizzle.h:42
+
CUTLASS_DEVICE IgemmSwizzle()
The src/dst must be int8 fragments.
Definition: igemm_swizzle.h:59
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
+ + + + diff --git a/docs/generated-html/igemm__traits_8h.html b/docs/generated-html/igemm__traits_8h.html new file mode 100644 index 00000000..32d14d87 --- /dev/null +++ b/docs/generated-html/igemm__traits_8h.html @@ -0,0 +1,150 @@ + + + + + + + +Cutlass: igemm_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
igemm_traits.h File Reference
+
+
+ +

Defies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ >
 
struct  cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >
 
struct  cutlass::gemm::IgemmTileTraitsHelperA< kLayout_, GemmConfig_ >
 
struct  cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
 
struct  cutlass::gemm::IgemmTileTraitsHelperB< kLayout_, GemmConfig_ >
 
struct  cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
 
struct  cutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ >
 
struct  cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
 
struct  cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
 
struct  cutlass::gemm::IgemmTransformerB< kLayout_, Iterator_ >
 
struct  cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
 
struct  cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
 
struct  cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
 
struct  cutlass::gemm::IgemmEpilogueScalar< ScalarD_ >
 
struct  cutlass::gemm::IgemmEpilogueScalar< int >
 
struct  cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/igemm__traits_8h_source.html b/docs/generated-html/igemm__traits_8h_source.html new file mode 100644 index 00000000..ecdd4f1d --- /dev/null +++ b/docs/generated-html/igemm__traits_8h_source.html @@ -0,0 +1,166 @@ + + + + + + + +Cutlass: igemm_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
igemm_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 #include <cutlass/convert.h>
33 #include <cutlass/gemm/gemm.h>
43 #include <cutlass/reshape_tile.h>
44 
45 namespace cutlass {
46 namespace gemm {
47 
49 
50 template <
52  typename OutputTile_,
54  typename ScalarD_,
56  typename AccumulatorsPerThread_>
58  : public GemmConfig<
60  int8_t,
62  int8_t,
64  ScalarD_,
66  ScalarD_,
68  OutputTile_,
70  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, int8_t, int8_t, int>,
72  4,
74  4,
76  16,
78  4,
80  4,
82  16,
84  1,
86  4,
88  1,
90  2> {};
91 
93 
94 template <typename OutputTile_, typename AccumulatorsPerThread_>
95 struct IgemmConfig<OutputTile_, int8_t, AccumulatorsPerThread_>
96  : public GemmConfig<
98  int8_t,
100  int8_t,
102  int8_t,
104  int8_t,
106  OutputTile_,
108  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, int8_t, int8_t, int>,
110  4,
112  4,
114  16,
116  4,
118  4,
120  16,
122  4,
124  4,
126  4,
128  2> {};
129 
131 
132 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
133 struct IgemmTileTraitsHelperA : public GemmTileTraitsHelperA<kLayout_, GemmConfig_> {};
134 
136 
137 template <typename GemmConfig_>
138 struct IgemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_>
139  : public GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> {
142 
144  static int const kScalarsPerStsA = 16;
145 
149  // The layout.
151  // The pointer is float const.
152  int8_t const,
153  // The tile has size KxM in GEMM's terminology.
155  // The threads are distributed as warps x 32 (the traits may reorganize).
157  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
158  4>
160 
163  // The pointer is float.
164  int8_t,
165  // The tile has size KxM in GEMM's terminology.
166  Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kW * 4>,
167  // The threads are distributed as warps x 32 (the traits may reorganize).
168  typename GlobalTileTraits::Threads,
169  // The number of scalars per STS (STS.32 or STS.128, etc).
170  kScalarsPerStsA>
172 };
173 
175 
176 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
177 struct IgemmTileTraitsHelperB : public GemmTileTraitsHelperB<kLayout_, GemmConfig_> {};
178 
180 
181 template <typename GemmConfig_>
182 struct IgemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_>
183  : public GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> {
186 
188  static int const kScalarsPerStsB = 16;
189 
193  // The layout.
195  // The pointer is float const.
196  int8_t const,
197  // The tile has size KxM in GEMM's terminology.
199  // The threads are distributed as warps x 32 (the traits may reorganize).
201  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
202  4>
204 
207  // The pointer is float.
208  int8_t,
209  // The tile has size KxM in GEMM's terminology.
210  Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kH * 4>,
211  // The threads are distributed as warps x 32 (the traits may reorganize).
212  typename GlobalTileTraits::Threads,
213  // The number of scalars per STS (STS.32 or STS.128, etc).
214  kScalarsPerStsB>
216 };
217 
219 
220 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
222 
223 template <typename Iterator_>
224 struct IgemmTransformerA<MatrixLayout::kRowMajor, Iterator_> {
226 };
227 
228 template <typename Iterator_>
229 struct IgemmTransformerA<MatrixLayout::kColumnMajor, Iterator_> {
231 };
232 
234 
235 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
237 
238 template <typename Iterator_>
239 struct IgemmTransformerB<MatrixLayout::kColumnMajor, Iterator_> {
241 };
242 
243 template <typename Iterator_>
244 struct IgemmTransformerB<MatrixLayout::kRowMajor, Iterator_> {
246 };
247 
249 
250 template <
252  MatrixLayout::Kind kLayoutA_,
254  MatrixLayout::Kind kLayoutB_,
256  typename OutputTile_,
258  typename ScalarD_,
260  typename EpilogueFunctor_,
262  typename AccumulatorsPerThread_ = Shape<32, 8, 8>,
264  typename Index_ = int>
272 
277  typedef typename IgemmTransformerA<GemmTileTraitsHelperA::kLayout,
280  typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits,
281  typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
288 
292  // The default transformer for B.
293  typedef typename IgemmTransformerB<GemmTileTraitsHelperB::kLayout,
296  typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits,
297  typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
304 
306  typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits,
307  typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
315  typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits,
316  typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
323 
328 
331 };
332 
334 
335 template <typename ScalarD_>
337  typedef float Scalar;
338 };
339 
340 template <>
341 struct IgemmEpilogueScalar<int> {
342  typedef int Scalar;
343 };
344 
346 
347 template <
349  MatrixLayout::Kind kLayoutA_,
351  MatrixLayout::Kind kLayoutB_,
353  typename OutputTile_ = Shape<32, 128, 128>,
355  typename ScalarD_ = int,
359  typename AccumulatorsPerThread_ = Shape<32, 8, 8>,
361  typename Index_ = int,
363  typename Helper_ = IgemmTraitsHelper<kLayoutA_,
364  kLayoutB_,
365  OutputTile_,
366  ScalarD_,
367  EpilogueFunctor_,
368  AccumulatorsPerThread_,
369  Index_> >
370 struct IgemmTraits : public GemmTraits<
371  // The config.
372  typename Helper_::GemmConfig,
373  // The stream to load A from global memory to shared memory.
374  typename Helper_::GlobalLoadStreamA,
375  // The stream to load B from global memory to shared memory.
376  typename Helper_::GlobalLoadStreamB,
377  // The stream to load A from shared memory.
378  typename Helper_::SharedLoadStreamA,
379  // The stream to load B from shared memory.
380  typename Helper_::SharedLoadStreamB,
381  // The epilogue.
382  typename Helper_::Epilogue,
383  // The block swizzle to reorganize the grid.
384  IdentityBlockSwizzle,
385  // The index.
386  Index_,
387  // The tool used to clear accumulators.
388  typename Helper_::ClearAccumulators> {};
389 
391 
392 } // namespace gemm
393 } // namespace cutlass
Definition: load_store.h:42
+
TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
The iterator to load B from shared memory.
Definition: igemm_traits.h:319
+
Definition: convert.h:33
+
IgemmSwizzle< Iterator_ > Transformer
Definition: igemm_traits.h:230
+
Defines iterators for efficiently loading and storing to global memory.
+
GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
The iterator to load A from global memory.
Definition: igemm_traits.h:275
+
Transposes a fragment of data containing packed 8-bit integer elements.
+
Copy< typename Iterator_::Fragment > Transformer
Definition: igemm_traits.h:240
+
Defines structural properties of complete GEMM computation.
+
GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
The stream to load B from global memory to shared memory.
Definition: igemm_traits.h:303
+
Definition: igemm_traits.h:133
+
TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
The iterator to store B to shared memory.
Definition: igemm_traits.h:300
+
IgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
Definition: igemm_traits.h:294
+
Definition: igemm_epilogue.h:290
+
IgemmContiguousGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
The traits class to build the iterator to load data from global memory for B^T.
Definition: igemm_traits.h:203
+
Definition: convert.h:69
+
GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Base
The base config.
Definition: igemm_traits.h:141
+
IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > GemmConfig
The IGEMM config.
Definition: igemm_traits.h:267
+
Definition: gemm_shared_tile.h:38
+
Definition: tile_iterator.h:62
+
Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction.
+
Definition: gemm_global_tile.h:159
+
GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kH *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsB > SharedStoreTileTraits
The traits class to build the iterator to store data to shared memory for B^N.
Definition: igemm_traits.h:215
+
Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
+
Definition: gemm_global_stream.h:161
+
Definition: gemm_traits.h:273
+
GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
The iterator to load B from global memory.
Definition: igemm_traits.h:291
+
IgemmContiguousGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
The traits class to build the iterator to load data from global memory for A^N.
Definition: igemm_traits.h:159
+
int Scalar
Definition: igemm_traits.h:342
+
IgemmSwizzle< Iterator_ > Transformer
Definition: igemm_traits.h:245
+
Describes layouts of matrices.
Definition: matrix_traits.h:35
+
IgemmTileTraitsHelperB< kLayoutB_, GemmConfig > GemmTileTraitsHelperB
The GEMM config for B.
Definition: igemm_traits.h:271
+
Definition: igemm_swizzle.h:38
+
Definition: igemm_traits.h:177
+
Definition: igemm_traits.h:265
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+
GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
The stream to load A from global memory to shared memory.
Definition: igemm_traits.h:287
+
SharedLoadStream< SharedLoadIteratorB, Copy< typename SharedLoadIteratorB::Fragment > > SharedLoadStreamB
The stream to load B from shared memory.
Definition: igemm_traits.h:322
+
Defines iterators for efficiently loading and storing tiles to and from shared memory.
+
Definition: matrix_traits.h:36
+
IgemmTileTraitsHelperA< kLayoutA_, GemmConfig > GemmTileTraitsHelperA
The GEMM config for A.
Definition: igemm_traits.h:269
+
Definition: gemm_shared_stream.h:44
+
Defines a type for restructuring a tile.
+
TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
The iterator to load A from shared memory.
Definition: igemm_traits.h:310
+
ClearAccumulators< typename MultiplyAdd::ScalarC > ClearAccumulators
The object to clear accumulators.
Definition: igemm_traits.h:327
+
Definition: gemm_traits.h:79
+
Definition: gemm_traits.h:137
+
Definition: matrix_traits.h:43
+
Definition: igemm_traits.h:57
+
Definition: igemm_traits.h:221
+
Definition: igemm_global_tile.h:50
+
float Scalar
Definition: igemm_traits.h:337
+
Definition: gemm_traits.h:428
+
Copy< typename Iterator_::Fragment > Transformer
Definition: igemm_traits.h:225
+
Definition: igemm_traits.h:370
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kW *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsA > SharedStoreTileTraits
The traits class to build the iterator to store data to shared memory for A^N.
Definition: igemm_traits.h:171
+ +
Template performing matrix multiply-add operation within a thread.
Definition: thread_multiply_add.h:43
+
Definition: matrix_traits.h:36
+ +
IgemmEpilogue< IgemmEpilogueTraits< GemmConfig, EpilogueFunctor_ > > Epilogue
The epilogue.
Definition: igemm_traits.h:330
+
IgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
The default transformer for A.
Definition: igemm_traits.h:278
+
Kind
Definition: matrix_traits.h:36
+
Definition: igemm_traits.h:236
+
TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
The iterator to store A to shared memory.
Definition: igemm_traits.h:284
+
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40
+
Definition: matrix_traits.h:43
+
Implements a software-pipelined efficient GEMM.
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
Defines structural properties of the GEMM epilogue.
+
Definition: igemm_traits.h:336
+
Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point o...
+
Defines conversion operations among Fragments of different base type.
+
GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Base
The base config.
Definition: igemm_traits.h:185
+
SharedLoadStream< SharedLoadIteratorA, Copy< typename SharedLoadIteratorA::Fragment > > SharedLoadStreamA
The stream to load A from shared memory.
Definition: igemm_traits.h:313
+
Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load ea...
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
+
GemmConfig::MultiplyAdd MultiplyAdd
The multiply-add functor.
Definition: igemm_traits.h:325
+
+ + + + diff --git a/docs/generated-html/index.html b/docs/generated-html/index.html new file mode 100644 index 00000000..f2ba6899 --- /dev/null +++ b/docs/generated-html/index.html @@ -0,0 +1,83 @@ + + + + + + + +Cutlass: Main Page + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Cutlass Documentation
+
+
+
+ + + + diff --git a/docs/generated-html/iterator__access_8h.html b/docs/generated-html/iterator__access_8h.html new file mode 100644 index 00000000..cc41cd5a --- /dev/null +++ b/docs/generated-html/iterator__access_8h.html @@ -0,0 +1,175 @@ + + + + + + + +Cutlass: iterator_access.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
iterator_access.h File Reference
+
+
+ +

Free functions for loading and storing to implementations of tile iteartor concepts. +More...

+ +

Go to the source code of this file.

+ + + + +

+Namespaces

 cutlass
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

template<typename InputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator &iterator, Fragment &fragment)
 Loads a fragment from an input iterator. More...
 
template<typename InputIterator , typename Fragment >
CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator &iterator, Fragment &fragment)
 Loads a fragment from a shared memory input iterator. More...
 
template<typename InputIterator , typename Fragment >
CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator &iterator, Fragment &fragment, int d)
 Loads a fragment from a shared memory input iterator. More...
 
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
 Loads a fragment from an input iterator, masked by a predicate iterator. More...
 
template<typename InputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
 Loads a fragment from an input iterator. More...
 
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
 Loads a fragment from an input iterator. More...
 
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
 
template<typename InputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
 Loads a fragment from an input iterator. More...
 
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
 Loads a fragment from an input iterator. More...
 
template<typename OutputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator &iterator, Fragment &fragment)
 Stores a fragment to an output iterator. More...
 
template<typename OutputIterator , typename Fragment >
CUTLASS_DEVICE void cutlass::shared_iterator_store (OutputIterator &iterator, Fragment const &fragment)
 Stores a fragment to a shared memory output iterator. More...
 
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
 Stores a fragment to an output iterator, masked by a predicate iterator. More...
 
template<typename OutputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
 Stores a fragment to an output iterator. More...
 
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
 Stores a fragment to an output iterator. More...
 
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
 Stores a fragment to an output iterator, masked by a predicate iterator. More...
 
template<typename OutputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
 Stores a fragment to an output iterator. More...
 
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
 Stores a fragment to an output iterator. More...
 
+
+ + + + diff --git a/docs/generated-html/iterator__access_8h_source.html b/docs/generated-html/iterator__access_8h_source.html new file mode 100644 index 00000000..11289a93 --- /dev/null +++ b/docs/generated-html/iterator__access_8h_source.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: iterator_access.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
iterator_access.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
31 #include <cutlass/load_store.h>
33 #include <cutlass/shape.h>
34 
35 namespace cutlass {
36 
38 
40 template <typename InputIterator, typename Fragment>
41 CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment) {
42  typename InputIterator::FragmentIterator frag_iterator(fragment);
43  for (int d = 0; d < InputIterator::Iterations::kD; ++d) {
44  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
45  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
46  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
47  if (iterator.valid(d, h, w, c)) {
48  int const offset =
50  0, 0, w, c);
52  load(reinterpret_cast<typename InputIterator::AccessType &>(
53  frag_iterator.at(d, h, w, c)),
54  iterator.data(),
55  offset);
56  }
57  }
58  if (w < InputIterator::Iterations::kW - 1) {
59  iterator.inc_w();
60  }
61  }
62  if (h < InputIterator::Iterations::kH - 1) {
63  iterator.inc_h();
64  }
65  }
66  if (d < InputIterator::Iterations::kD - 1) {
67  iterator.inc_d();
68  }
69  }
70  iterator.inc_advance();
71 }
72 
74 template <typename InputIterator, typename Fragment>
75 CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment) {
76  typename InputIterator::FragmentIterator frag_iterator(fragment);
77  for (int d = 0; d < InputIterator::Iterations::kD; ++d) {
78  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
79  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
80  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
81  int const offset =
83  d, h, w, c);
84 
85  FragmentLoad<InputIterator::kIteratorFragment,
86  InputIterator::Tile::kC,
87  typename InputIterator::Scalar,
88  InputIterator::kMemorySpace,
89  typename InputIterator::FragmentElement,
90  InputIterator::Tile::kW>::load(frag_iterator.at(d, h, w, c),
91  iterator.data(),
92  offset);
93  }
94  }
95  }
96  }
97 }
98 
100 template <typename InputIterator, typename Fragment>
101 CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment, int d) {
102  typename InputIterator::FragmentIterator frag_iterator(fragment);
103  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
104  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
105  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
106  int const offset =
108  d, h, w, c);
109 
110  FragmentLoad<InputIterator::kIteratorFragment,
111  InputIterator::Tile::kC,
112  typename InputIterator::Scalar,
113  InputIterator::kMemorySpace,
114  typename InputIterator::FragmentElement,
115  InputIterator::Tile::kW>::load(frag_iterator.at(0, h, w, c),
116  iterator.data(),
117  offset);
118  }
119  }
120  }
121 }
122 
124 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
126  Fragment &fragment,
127  typename InputIterator::Index offset,
128  ConstPredicateAdapter predicate_adapter) {
129  for (int d = 0; d < InputIterator::Iterations::kD; ++d, iterator.inc_d()) {
130  for (int h = 0; h < InputIterator::Iterations::kH; ++h, iterator.inc_h()) {
131  for (int w = 0; w < InputIterator::Iterations::kW; ++w, iterator.inc_w()) {
132  if (predicate_adapter.at(d, h, w, 0)) {
133  int idx = InputIterator::Tile::kC *
134  (w + InputIterator::Iterations::kW * (h + InputIterator::Iterations::kH * d));
135 
137  load(reinterpret_cast<typename InputIterator::AccessType &>(fragment[idx]),
138  iterator.data(),
139  offset);
140  }
141  }
142  }
143  }
144 }
145 
147 template <typename InputIterator, typename Fragment>
149  Fragment &fragment,
150  typename InputIterator::Index offset = 0) {
152  iterator_load_post_increment(iterator, fragment, offset, pred);
153 }
154 
156 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
158  Fragment &fragment,
159  ConstPredicateAdapter pred_it) {
160  iterator_load_post_increment(iterator, fragment, 0, pred_it);
161 }
162 
163 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
164 CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &_iterator,
165  Fragment &fragment,
166  typename InputIterator::Index offset,
167  ConstPredicateAdapter predicate_adapter) {
168  InputIterator iterator(_iterator);
169  iterator_load_post_increment(iterator, fragment, offset, predicate_adapter);
170 }
171 
173 template <typename InputIterator, typename Fragment>
174 CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator,
175  Fragment &fragment,
176  typename InputIterator::Index offset = 0) {
178  iterator_load(iterator, fragment, offset, pred);
179 }
180 
182 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
183 CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator,
184  Fragment &fragment,
185  ConstPredicateAdapter pred_it) {
186  iterator_load(iterator, fragment, 0, pred_it);
187 }
188 
190 
192 template <typename OutputIterator, typename Fragment>
193 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment) {
194  typename OutputIterator::FragmentIterator frag_iterator(fragment);
195  for (int d = 0; d < OutputIterator::Iterations::kD; ++d) {
196  for (int h = 0; h < OutputIterator::Iterations::kH; ++h) {
197  for (int w = 0; w < OutputIterator::Iterations::kW; ++w) {
198  if (iterator.valid(d, h, w, 0)) {
199  int const offset =
201  d, h, w, 0);
202 
203  Store<typename Fragment::Element,
204  OutputIterator::Tile::kC,
205  OutputIterator::kMemorySpace>::
206  store(reinterpret_cast<typename OutputIterator::AccessType &>(
207  frag_iterator.at(d, h, w, 0)),
208  iterator.data(),
209  offset);
210  }
211  if (w < OutputIterator::Iterations::kW - 1) {
212  iterator.inc_w();
213  }
214  }
215  if (h < OutputIterator::Iterations::kH - 1) {
216  iterator.inc_h();
217  }
218  }
219  if (d < OutputIterator::Iterations::kD - 1) {
220  iterator.inc_d();
221  }
222  }
223  iterator.inc_advance();
224 }
225 
227 template <typename OutputIterator, typename Fragment>
228 CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment) {
229  typename OutputIterator::FragmentConstIterator frag_iterator(fragment);
230  for (int d = 0; d < OutputIterator::Iterations::kD; ++d) {
231  for (int h = 0; h < OutputIterator::Iterations::kH; ++h) {
232  for (int w = 0; w < OutputIterator::Iterations::kW; ++w) {
233  for (int c = 0; c < OutputIterator::Iterations::kC; ++c) {
234  int const offset =
236  d, h, w, c);
237 
238  FragmentStore<OutputIterator::kIteratorFragment,
239  OutputIterator::Tile::kC,
240  typename OutputIterator::Scalar,
241  OutputIterator::kMemorySpace,
242  typename OutputIterator::FragmentElement,
243  OutputIterator::Tile::kW>::store(frag_iterator.at(d, h, w, c),
244  iterator.data(),
245  offset);
246  }
247  }
248  }
249  }
250 }
251 
253 
255 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
257  Fragment const &fragment,
258  typename OutputIterator::Index offset,
259  ConstPredicateAdapter predicate_adapter) {
260  for (int d = 0; d < OutputIterator::Iterations::kD; ++d, iterator.inc_d()) {
261  for (int h = 0; h < OutputIterator::Iterations::kH; ++h, iterator.inc_h()) {
262  for (int w = 0; w < OutputIterator::Iterations::kW; ++w, iterator.inc_w()) {
263  if (predicate_adapter.at(d, h, w, 0)) {
264  int idx = OutputIterator::Tile::kC *
265  (w + OutputIterator::Iterations::kW * (h + OutputIterator::Iterations::kH * d));
266 
267  Store<typename Fragment::Element,
268  OutputIterator::Tile::kC,
269  OutputIterator::kMemorySpace>::
270  store(reinterpret_cast<typename OutputIterator::AccessType const &>(fragment[idx]),
271  iterator.data(),
272  offset);
273  }
274  }
275  }
276  }
277 }
278 
280 template <typename OutputIterator, typename Fragment>
282  Fragment const &fragment,
283  typename OutputIterator::Index offset = 0) {
285  iterator_store_post_increment(iterator, fragment, offset, pred);
286 }
287 
289 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
291  Fragment const &fragment,
292  ConstPredicateAdapter pred_it) {
293  iterator_store_post_increment(iterator, fragment, 0, pred_it);
294 }
295 
297 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
298 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &_iterator,
299  Fragment const &fragment,
300  typename OutputIterator::Index offset,
301  ConstPredicateAdapter predicate_adapter) {
302  OutputIterator iterator(_iterator);
303  iterator_store_post_increment(iterator, fragment, offset, predicate_adapter);
304 }
305 
307 template <typename OutputIterator, typename Fragment>
308 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator,
309  Fragment const &fragment,
310  typename OutputIterator::Index offset = 0) {
312  iterator_store(iterator, fragment, offset, pred);
313 }
314 
316 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
317 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator,
318  Fragment const &fragment,
319  ConstPredicateAdapter pred_it) {
320  iterator_store(iterator, fragment, 0, pred_it);
321 }
322 
324 
325 } // namespace cutlass
Definition: fragment_load_store.h:43
+
Definition: convert.h:33
+
CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from a shared memory input iterator.
Definition: iterator_access.h:75
+
CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
Stores a fragment to an output iterator, masked by a predicate iterator.
Definition: iterator_access.h:256
+
Defines accessors for loading and storing fragments to memory efficiently.
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:59
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
Definition: load_store.h:131
+
Defines container classes and iterators for managing a statically sized vector of boolean predicates...
+
static CUTLASS_DEVICE int get(int d, int h, int w, int c)
Definition: shape.h:211
+
CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
Loads a fragment from an input iterator, masked by a predicate iterator.
Definition: iterator_access.h:125
+
Defines abstractions for efficiently loading and storing vectors to memory.
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment)
Stores a fragment to a shared memory output iterator.
Definition: iterator_access.h:228
+
Element_ Element
The element.
Definition: fragment.h:108
+
Always returns true predicate.
Definition: predicate_vector.h:426
+
CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
Stores a fragment to an output iterator.
Definition: iterator_access.h:193
+
Definition: fragment_load_store.h:91
+
CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from an input iterator.
Definition: iterator_access.h:41
+
Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
+
+ + + + diff --git a/docs/generated-html/jquery.js b/docs/generated-html/jquery.js new file mode 100644 index 00000000..2771c749 --- /dev/null +++ b/docs/generated-html/jquery.js @@ -0,0 +1,115 @@ +/* + @licstart The following is the entire license notice for the + JavaScript code in this file. + + Copyright (C) 1997-2017 by Dimitri van Heesch + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + @licend The above is the entire license notice + for the JavaScript code in this file + */ +/*! + * jQuery JavaScript Library v1.7.1 + * http://jquery.com/ + * + * Copyright 2011, John Resig + * Dual licensed under the MIT or GPL Version 2 licenses. + * http://jquery.org/license + * + * Includes Sizzle.js + * http://sizzlejs.com/ + * Copyright 2011, The Dojo Foundation + * Released under the MIT, BSD, and GPL Licenses. + * + * Date: Mon Nov 21 21:11:03 2011 -0500 + */ +(function(bb,L){var av=bb.document,bu=bb.navigator,bl=bb.location;var b=(function(){var bF=function(b0,b1){return new bF.fn.init(b0,b1,bD)},bU=bb.jQuery,bH=bb.$,bD,bY=/^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/,bM=/\S/,bI=/^\s+/,bE=/\s+$/,bA=/^<(\w+)\s*\/?>(?:<\/\1>)?$/,bN=/^[\],:{}\s]*$/,bW=/\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g,bP=/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g,bJ=/(?:^|:|,)(?:\s*\[)+/g,by=/(webkit)[ \/]([\w.]+)/,bR=/(opera)(?:.*version)?[ \/]([\w.]+)/,bQ=/(msie) ([\w.]+)/,bS=/(mozilla)(?:.*? rv:([\w.]+))?/,bB=/-([a-z]|[0-9])/ig,bZ=/^-ms-/,bT=function(b0,b1){return(b1+"").toUpperCase()},bX=bu.userAgent,bV,bC,e,bL=Object.prototype.toString,bG=Object.prototype.hasOwnProperty,bz=Array.prototype.push,bK=Array.prototype.slice,bO=String.prototype.trim,bv=Array.prototype.indexOf,bx={};bF.fn=bF.prototype={constructor:bF,init:function(b0,b4,b3){var b2,b5,b1,b6;if(!b0){return this}if(b0.nodeType){this.context=this[0]=b0;this.length=1;return this}if(b0==="body"&&!b4&&av.body){this.context=av;this[0]=av.body;this.selector=b0;this.length=1;return this}if(typeof b0==="string"){if(b0.charAt(0)==="<"&&b0.charAt(b0.length-1)===">"&&b0.length>=3){b2=[null,b0,null]}else{b2=bY.exec(b0)}if(b2&&(b2[1]||!b4)){if(b2[1]){b4=b4 instanceof bF?b4[0]:b4;b6=(b4?b4.ownerDocument||b4:av);b1=bA.exec(b0);if(b1){if(bF.isPlainObject(b4)){b0=[av.createElement(b1[1])];bF.fn.attr.call(b0,b4,true)}else{b0=[b6.createElement(b1[1])]}}else{b1=bF.buildFragment([b2[1]],[b6]);b0=(b1.cacheable?bF.clone(b1.fragment):b1.fragment).childNodes}return bF.merge(this,b0)}else{b5=av.getElementById(b2[2]);if(b5&&b5.parentNode){if(b5.id!==b2[2]){return b3.find(b0)}this.length=1;this[0]=b5}this.context=av;this.selector=b0;return this}}else{if(!b4||b4.jquery){return(b4||b3).find(b0)}else{return this.constructor(b4).find(b0)}}}else{if(bF.isFunction(b0)){return b3.ready(b0)}}if(b0.selector!==L){this.selector=b0.selector;this.context=b0.context}return bF.makeArray(b0,this)},selector:"",jquery:"1.7.1",length:0,size:function(){return this.length},toArray:function(){return bK.call(this,0)},get:function(b0){return b0==null?this.toArray():(b0<0?this[this.length+b0]:this[b0])},pushStack:function(b1,b3,b0){var b2=this.constructor();if(bF.isArray(b1)){bz.apply(b2,b1)}else{bF.merge(b2,b1)}b2.prevObject=this;b2.context=this.context;if(b3==="find"){b2.selector=this.selector+(this.selector?" ":"")+b0}else{if(b3){b2.selector=this.selector+"."+b3+"("+b0+")"}}return b2},each:function(b1,b0){return bF.each(this,b1,b0)},ready:function(b0){bF.bindReady();bC.add(b0);return this},eq:function(b0){b0=+b0;return b0===-1?this.slice(b0):this.slice(b0,b0+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(bK.apply(this,arguments),"slice",bK.call(arguments).join(","))},map:function(b0){return this.pushStack(bF.map(this,function(b2,b1){return b0.call(b2,b1,b2)}))},end:function(){return this.prevObject||this.constructor(null)},push:bz,sort:[].sort,splice:[].splice};bF.fn.init.prototype=bF.fn;bF.extend=bF.fn.extend=function(){var b9,b2,b0,b1,b6,b7,b5=arguments[0]||{},b4=1,b3=arguments.length,b8=false;if(typeof b5==="boolean"){b8=b5;b5=arguments[1]||{};b4=2}if(typeof b5!=="object"&&!bF.isFunction(b5)){b5={}}if(b3===b4){b5=this;--b4}for(;b40){return}bC.fireWith(av,[bF]);if(bF.fn.trigger){bF(av).trigger("ready").off("ready")}}},bindReady:function(){if(bC){return}bC=bF.Callbacks("once memory");if(av.readyState==="complete"){return setTimeout(bF.ready,1)}if(av.addEventListener){av.addEventListener("DOMContentLoaded",e,false);bb.addEventListener("load",bF.ready,false)}else{if(av.attachEvent){av.attachEvent("onreadystatechange",e);bb.attachEvent("onload",bF.ready);var b0=false;try{b0=bb.frameElement==null}catch(b1){}if(av.documentElement.doScroll&&b0){bw()}}}},isFunction:function(b0){return bF.type(b0)==="function"},isArray:Array.isArray||function(b0){return bF.type(b0)==="array"},isWindow:function(b0){return b0&&typeof b0==="object"&&"setInterval" in b0},isNumeric:function(b0){return !isNaN(parseFloat(b0))&&isFinite(b0)},type:function(b0){return b0==null?String(b0):bx[bL.call(b0)]||"object"},isPlainObject:function(b2){if(!b2||bF.type(b2)!=="object"||b2.nodeType||bF.isWindow(b2)){return false}try{if(b2.constructor&&!bG.call(b2,"constructor")&&!bG.call(b2.constructor.prototype,"isPrototypeOf")){return false}}catch(b1){return false}var b0;for(b0 in b2){}return b0===L||bG.call(b2,b0)},isEmptyObject:function(b1){for(var b0 in b1){return false}return true},error:function(b0){throw new Error(b0)},parseJSON:function(b0){if(typeof b0!=="string"||!b0){return null}b0=bF.trim(b0);if(bb.JSON&&bb.JSON.parse){return bb.JSON.parse(b0)}if(bN.test(b0.replace(bW,"@").replace(bP,"]").replace(bJ,""))){return(new Function("return "+b0))()}bF.error("Invalid JSON: "+b0)},parseXML:function(b2){var b0,b1;try{if(bb.DOMParser){b1=new DOMParser();b0=b1.parseFromString(b2,"text/xml")}else{b0=new ActiveXObject("Microsoft.XMLDOM");b0.async="false";b0.loadXML(b2)}}catch(b3){b0=L}if(!b0||!b0.documentElement||b0.getElementsByTagName("parsererror").length){bF.error("Invalid XML: "+b2)}return b0},noop:function(){},globalEval:function(b0){if(b0&&bM.test(b0)){(bb.execScript||function(b1){bb["eval"].call(bb,b1)})(b0)}},camelCase:function(b0){return b0.replace(bZ,"ms-").replace(bB,bT)},nodeName:function(b1,b0){return b1.nodeName&&b1.nodeName.toUpperCase()===b0.toUpperCase()},each:function(b3,b6,b2){var b1,b4=0,b5=b3.length,b0=b5===L||bF.isFunction(b3);if(b2){if(b0){for(b1 in b3){if(b6.apply(b3[b1],b2)===false){break}}}else{for(;b40&&b0[0]&&b0[b1-1])||b1===0||bF.isArray(b0));if(b3){for(;b21?aJ.call(arguments,0):bG;if(!(--bw)){bC.resolveWith(bC,bx)}}}function bz(bF){return function(bG){bB[bF]=arguments.length>1?aJ.call(arguments,0):bG;bC.notifyWith(bE,bB)}}if(e>1){for(;bv
a";bI=bv.getElementsByTagName("*");bF=bv.getElementsByTagName("a")[0];if(!bI||!bI.length||!bF){return{}}bG=av.createElement("select");bx=bG.appendChild(av.createElement("option"));bE=bv.getElementsByTagName("input")[0];bJ={leadingWhitespace:(bv.firstChild.nodeType===3),tbody:!bv.getElementsByTagName("tbody").length,htmlSerialize:!!bv.getElementsByTagName("link").length,style:/top/.test(bF.getAttribute("style")),hrefNormalized:(bF.getAttribute("href")==="/a"),opacity:/^0.55/.test(bF.style.opacity),cssFloat:!!bF.style.cssFloat,checkOn:(bE.value==="on"),optSelected:bx.selected,getSetAttribute:bv.className!=="t",enctype:!!av.createElement("form").enctype,html5Clone:av.createElement("nav").cloneNode(true).outerHTML!=="<:nav>",submitBubbles:true,changeBubbles:true,focusinBubbles:false,deleteExpando:true,noCloneEvent:true,inlineBlockNeedsLayout:false,shrinkWrapBlocks:false,reliableMarginRight:true};bE.checked=true;bJ.noCloneChecked=bE.cloneNode(true).checked;bG.disabled=true;bJ.optDisabled=!bx.disabled;try{delete bv.test}catch(bC){bJ.deleteExpando=false}if(!bv.addEventListener&&bv.attachEvent&&bv.fireEvent){bv.attachEvent("onclick",function(){bJ.noCloneEvent=false});bv.cloneNode(true).fireEvent("onclick")}bE=av.createElement("input");bE.value="t";bE.setAttribute("type","radio");bJ.radioValue=bE.value==="t";bE.setAttribute("checked","checked");bv.appendChild(bE);bD=av.createDocumentFragment();bD.appendChild(bv.lastChild);bJ.checkClone=bD.cloneNode(true).cloneNode(true).lastChild.checked;bJ.appendChecked=bE.checked;bD.removeChild(bE);bD.appendChild(bv);bv.innerHTML="";if(bb.getComputedStyle){bA=av.createElement("div");bA.style.width="0";bA.style.marginRight="0";bv.style.width="2px";bv.appendChild(bA);bJ.reliableMarginRight=(parseInt((bb.getComputedStyle(bA,null)||{marginRight:0}).marginRight,10)||0)===0}if(bv.attachEvent){for(by in {submit:1,change:1,focusin:1}){bB="on"+by;bw=(bB in bv);if(!bw){bv.setAttribute(bB,"return;");bw=(typeof bv[bB]==="function")}bJ[by+"Bubbles"]=bw}}bD.removeChild(bv);bD=bG=bx=bA=bv=bE=null;b(function(){var bM,bU,bV,bT,bN,bO,bL,bS,bR,e,bP,bQ=av.getElementsByTagName("body")[0];if(!bQ){return}bL=1;bS="position:absolute;top:0;left:0;width:1px;height:1px;margin:0;";bR="visibility:hidden;border:0;";e="style='"+bS+"border:5px solid #000;padding:0;'";bP="
";bM=av.createElement("div");bM.style.cssText=bR+"width:0;height:0;position:static;top:0;margin-top:"+bL+"px";bQ.insertBefore(bM,bQ.firstChild);bv=av.createElement("div");bM.appendChild(bv);bv.innerHTML="
t
";bz=bv.getElementsByTagName("td");bw=(bz[0].offsetHeight===0);bz[0].style.display="";bz[1].style.display="none";bJ.reliableHiddenOffsets=bw&&(bz[0].offsetHeight===0);bv.innerHTML="";bv.style.width=bv.style.paddingLeft="1px";b.boxModel=bJ.boxModel=bv.offsetWidth===2;if(typeof bv.style.zoom!=="undefined"){bv.style.display="inline";bv.style.zoom=1;bJ.inlineBlockNeedsLayout=(bv.offsetWidth===2);bv.style.display="";bv.innerHTML="
";bJ.shrinkWrapBlocks=(bv.offsetWidth!==2)}bv.style.cssText=bS+bR;bv.innerHTML=bP;bU=bv.firstChild;bV=bU.firstChild;bN=bU.nextSibling.firstChild.firstChild;bO={doesNotAddBorder:(bV.offsetTop!==5),doesAddBorderForTableAndCells:(bN.offsetTop===5)};bV.style.position="fixed";bV.style.top="20px";bO.fixedPosition=(bV.offsetTop===20||bV.offsetTop===15);bV.style.position=bV.style.top="";bU.style.overflow="hidden";bU.style.position="relative";bO.subtractsBorderForOverflowNotVisible=(bV.offsetTop===-5);bO.doesNotIncludeMarginInBodyOffset=(bQ.offsetTop!==bL);bQ.removeChild(bM);bv=bM=null;b.extend(bJ,bO)});return bJ})();var aS=/^(?:\{.*\}|\[.*\])$/,aA=/([A-Z])/g;b.extend({cache:{},uuid:0,expando:"jQuery"+(b.fn.jquery+Math.random()).replace(/\D/g,""),noData:{embed:true,object:"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",applet:true},hasData:function(e){e=e.nodeType?b.cache[e[b.expando]]:e[b.expando];return !!e&&!S(e)},data:function(bx,bv,bz,by){if(!b.acceptData(bx)){return}var bG,bA,bD,bE=b.expando,bC=typeof bv==="string",bF=bx.nodeType,e=bF?b.cache:bx,bw=bF?bx[bE]:bx[bE]&&bE,bB=bv==="events";if((!bw||!e[bw]||(!bB&&!by&&!e[bw].data))&&bC&&bz===L){return}if(!bw){if(bF){bx[bE]=bw=++b.uuid}else{bw=bE}}if(!e[bw]){e[bw]={};if(!bF){e[bw].toJSON=b.noop}}if(typeof bv==="object"||typeof bv==="function"){if(by){e[bw]=b.extend(e[bw],bv)}else{e[bw].data=b.extend(e[bw].data,bv)}}bG=bA=e[bw];if(!by){if(!bA.data){bA.data={}}bA=bA.data}if(bz!==L){bA[b.camelCase(bv)]=bz}if(bB&&!bA[bv]){return bG.events}if(bC){bD=bA[bv];if(bD==null){bD=bA[b.camelCase(bv)]}}else{bD=bA}return bD},removeData:function(bx,bv,by){if(!b.acceptData(bx)){return}var bB,bA,bz,bC=b.expando,bD=bx.nodeType,e=bD?b.cache:bx,bw=bD?bx[bC]:bC;if(!e[bw]){return}if(bv){bB=by?e[bw]:e[bw].data;if(bB){if(!b.isArray(bv)){if(bv in bB){bv=[bv]}else{bv=b.camelCase(bv);if(bv in bB){bv=[bv]}else{bv=bv.split(" ")}}}for(bA=0,bz=bv.length;bA-1){return true}}return false},val:function(bx){var e,bv,by,bw=this[0];if(!arguments.length){if(bw){e=b.valHooks[bw.nodeName.toLowerCase()]||b.valHooks[bw.type];if(e&&"get" in e&&(bv=e.get(bw,"value"))!==L){return bv}bv=bw.value;return typeof bv==="string"?bv.replace(aU,""):bv==null?"":bv}return}by=b.isFunction(bx);return this.each(function(bA){var bz=b(this),bB;if(this.nodeType!==1){return}if(by){bB=bx.call(this,bA,bz.val())}else{bB=bx}if(bB==null){bB=""}else{if(typeof bB==="number"){bB+=""}else{if(b.isArray(bB)){bB=b.map(bB,function(bC){return bC==null?"":bC+""})}}}e=b.valHooks[this.nodeName.toLowerCase()]||b.valHooks[this.type];if(!e||!("set" in e)||e.set(this,bB,"value")===L){this.value=bB}})}});b.extend({valHooks:{option:{get:function(e){var bv=e.attributes.value;return !bv||bv.specified?e.value:e.text}},select:{get:function(e){var bA,bv,bz,bx,by=e.selectedIndex,bB=[],bC=e.options,bw=e.type==="select-one";if(by<0){return null}bv=bw?by:0;bz=bw?by+1:bC.length;for(;bv=0});if(!e.length){bv.selectedIndex=-1}return e}}},attrFn:{val:true,css:true,html:true,text:true,data:true,width:true,height:true,offset:true},attr:function(bA,bx,bB,bz){var bw,e,by,bv=bA.nodeType;if(!bA||bv===3||bv===8||bv===2){return}if(bz&&bx in b.attrFn){return b(bA)[bx](bB)}if(typeof bA.getAttribute==="undefined"){return b.prop(bA,bx,bB)}by=bv!==1||!b.isXMLDoc(bA);if(by){bx=bx.toLowerCase();e=b.attrHooks[bx]||(ao.test(bx)?aY:be)}if(bB!==L){if(bB===null){b.removeAttr(bA,bx);return}else{if(e&&"set" in e&&by&&(bw=e.set(bA,bB,bx))!==L){return bw}else{bA.setAttribute(bx,""+bB);return bB}}}else{if(e&&"get" in e&&by&&(bw=e.get(bA,bx))!==null){return bw}else{bw=bA.getAttribute(bx);return bw===null?L:bw}}},removeAttr:function(bx,bz){var by,bA,bv,e,bw=0;if(bz&&bx.nodeType===1){bA=bz.toLowerCase().split(af);e=bA.length;for(;bw=0)}}})});var bd=/^(?:textarea|input|select)$/i,n=/^([^\.]*)?(?:\.(.+))?$/,J=/\bhover(\.\S+)?\b/,aO=/^key/,bf=/^(?:mouse|contextmenu)|click/,T=/^(?:focusinfocus|focusoutblur)$/,U=/^(\w*)(?:#([\w\-]+))?(?:\.([\w\-]+))?$/,Y=function(e){var bv=U.exec(e);if(bv){bv[1]=(bv[1]||"").toLowerCase();bv[3]=bv[3]&&new RegExp("(?:^|\\s)"+bv[3]+"(?:\\s|$)")}return bv},j=function(bw,e){var bv=bw.attributes||{};return((!e[1]||bw.nodeName.toLowerCase()===e[1])&&(!e[2]||(bv.id||{}).value===e[2])&&(!e[3]||e[3].test((bv["class"]||{}).value)))},bt=function(e){return b.event.special.hover?e:e.replace(J,"mouseenter$1 mouseleave$1")};b.event={add:function(bx,bC,bJ,bA,by){var bD,bB,bK,bI,bH,bF,e,bG,bv,bz,bw,bE;if(bx.nodeType===3||bx.nodeType===8||!bC||!bJ||!(bD=b._data(bx))){return}if(bJ.handler){bv=bJ;bJ=bv.handler}if(!bJ.guid){bJ.guid=b.guid++}bK=bD.events;if(!bK){bD.events=bK={}}bB=bD.handle;if(!bB){bD.handle=bB=function(bL){return typeof b!=="undefined"&&(!bL||b.event.triggered!==bL.type)?b.event.dispatch.apply(bB.elem,arguments):L};bB.elem=bx}bC=b.trim(bt(bC)).split(" ");for(bI=0;bI=0){bG=bG.slice(0,-1);bw=true}if(bG.indexOf(".")>=0){bx=bG.split(".");bG=bx.shift();bx.sort()}if((!bA||b.event.customEvent[bG])&&!b.event.global[bG]){return}bv=typeof bv==="object"?bv[b.expando]?bv:new b.Event(bG,bv):new b.Event(bG);bv.type=bG;bv.isTrigger=true;bv.exclusive=bw;bv.namespace=bx.join(".");bv.namespace_re=bv.namespace?new RegExp("(^|\\.)"+bx.join("\\.(?:.*\\.)?")+"(\\.|$)"):null;by=bG.indexOf(":")<0?"on"+bG:"";if(!bA){e=b.cache;for(bC in e){if(e[bC].events&&e[bC].events[bG]){b.event.trigger(bv,bD,e[bC].handle.elem,true)}}return}bv.result=L;if(!bv.target){bv.target=bA}bD=bD!=null?b.makeArray(bD):[];bD.unshift(bv);bF=b.event.special[bG]||{};if(bF.trigger&&bF.trigger.apply(bA,bD)===false){return}bB=[[bA,bF.bindType||bG]];if(!bJ&&!bF.noBubble&&!b.isWindow(bA)){bI=bF.delegateType||bG;bH=T.test(bI+bG)?bA:bA.parentNode;bz=null;for(;bH;bH=bH.parentNode){bB.push([bH,bI]);bz=bH}if(bz&&bz===bA.ownerDocument){bB.push([bz.defaultView||bz.parentWindow||bb,bI])}}for(bC=0;bCbA){bH.push({elem:this,matches:bz.slice(bA)})}for(bC=0;bC0?this.on(e,null,bx,bw):this.trigger(e)};if(b.attrFn){b.attrFn[e]=true}if(aO.test(e)){b.event.fixHooks[e]=b.event.keyHooks}if(bf.test(e)){b.event.fixHooks[e]=b.event.mouseHooks}}); +/*! + * Sizzle CSS Selector Engine + * Copyright 2011, The Dojo Foundation + * Released under the MIT, BSD, and GPL Licenses. + * More information: http://sizzlejs.com/ + */ +(function(){var bH=/((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^\[\]]*\]|['"][^'"]*['"]|[^\[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g,bC="sizcache"+(Math.random()+"").replace(".",""),bI=0,bL=Object.prototype.toString,bB=false,bA=true,bK=/\\/g,bO=/\r\n/g,bQ=/\W/;[0,0].sort(function(){bA=false;return 0});var by=function(bV,e,bY,bZ){bY=bY||[];e=e||av;var b1=e;if(e.nodeType!==1&&e.nodeType!==9){return[]}if(!bV||typeof bV!=="string"){return bY}var bS,b3,b6,bR,b2,b5,b4,bX,bU=true,bT=by.isXML(e),bW=[],b0=bV;do{bH.exec("");bS=bH.exec(b0);if(bS){b0=bS[3];bW.push(bS[1]);if(bS[2]){bR=bS[3];break}}}while(bS);if(bW.length>1&&bD.exec(bV)){if(bW.length===2&&bE.relative[bW[0]]){b3=bM(bW[0]+bW[1],e,bZ)}else{b3=bE.relative[bW[0]]?[e]:by(bW.shift(),e);while(bW.length){bV=bW.shift();if(bE.relative[bV]){bV+=bW.shift()}b3=bM(bV,b3,bZ)}}}else{if(!bZ&&bW.length>1&&e.nodeType===9&&!bT&&bE.match.ID.test(bW[0])&&!bE.match.ID.test(bW[bW.length-1])){b2=by.find(bW.shift(),e,bT);e=b2.expr?by.filter(b2.expr,b2.set)[0]:b2.set[0]}if(e){b2=bZ?{expr:bW.pop(),set:bF(bZ)}:by.find(bW.pop(),bW.length===1&&(bW[0]==="~"||bW[0]==="+")&&e.parentNode?e.parentNode:e,bT);b3=b2.expr?by.filter(b2.expr,b2.set):b2.set;if(bW.length>0){b6=bF(b3)}else{bU=false}while(bW.length){b5=bW.pop();b4=b5;if(!bE.relative[b5]){b5=""}else{b4=bW.pop()}if(b4==null){b4=e}bE.relative[b5](b6,b4,bT)}}else{b6=bW=[]}}if(!b6){b6=b3}if(!b6){by.error(b5||bV)}if(bL.call(b6)==="[object Array]"){if(!bU){bY.push.apply(bY,b6)}else{if(e&&e.nodeType===1){for(bX=0;b6[bX]!=null;bX++){if(b6[bX]&&(b6[bX]===true||b6[bX].nodeType===1&&by.contains(e,b6[bX]))){bY.push(b3[bX])}}}else{for(bX=0;b6[bX]!=null;bX++){if(b6[bX]&&b6[bX].nodeType===1){bY.push(b3[bX])}}}}}else{bF(b6,bY)}if(bR){by(bR,b1,bY,bZ);by.uniqueSort(bY)}return bY};by.uniqueSort=function(bR){if(bJ){bB=bA;bR.sort(bJ);if(bB){for(var e=1;e0};by.find=function(bX,e,bY){var bW,bS,bU,bT,bV,bR;if(!bX){return[]}for(bS=0,bU=bE.order.length;bS":function(bW,bR){var bV,bU=typeof bR==="string",bS=0,e=bW.length;if(bU&&!bQ.test(bR)){bR=bR.toLowerCase();for(;bS=0)){if(!bS){e.push(bV)}}else{if(bS){bR[bU]=false}}}}return false},ID:function(e){return e[1].replace(bK,"")},TAG:function(bR,e){return bR[1].replace(bK,"").toLowerCase()},CHILD:function(e){if(e[1]==="nth"){if(!e[2]){by.error(e[0])}e[2]=e[2].replace(/^\+|\s*/g,"");var bR=/(-?)(\d*)(?:n([+\-]?\d*))?/.exec(e[2]==="even"&&"2n"||e[2]==="odd"&&"2n+1"||!/\D/.test(e[2])&&"0n+"+e[2]||e[2]);e[2]=(bR[1]+(bR[2]||1))-0;e[3]=bR[3]-0}else{if(e[2]){by.error(e[0])}}e[0]=bI++;return e},ATTR:function(bU,bR,bS,e,bV,bW){var bT=bU[1]=bU[1].replace(bK,"");if(!bW&&bE.attrMap[bT]){bU[1]=bE.attrMap[bT]}bU[4]=(bU[4]||bU[5]||"").replace(bK,"");if(bU[2]==="~="){bU[4]=" "+bU[4]+" "}return bU},PSEUDO:function(bU,bR,bS,e,bV){if(bU[1]==="not"){if((bH.exec(bU[3])||"").length>1||/^\w/.test(bU[3])){bU[3]=by(bU[3],null,null,bR)}else{var bT=by.filter(bU[3],bR,bS,true^bV);if(!bS){e.push.apply(e,bT)}return false}}else{if(bE.match.POS.test(bU[0])||bE.match.CHILD.test(bU[0])){return true}}return bU},POS:function(e){e.unshift(true);return e}},filters:{enabled:function(e){return e.disabled===false&&e.type!=="hidden"},disabled:function(e){return e.disabled===true},checked:function(e){return e.checked===true},selected:function(e){if(e.parentNode){e.parentNode.selectedIndex}return e.selected===true},parent:function(e){return !!e.firstChild},empty:function(e){return !e.firstChild},has:function(bS,bR,e){return !!by(e[3],bS).length},header:function(e){return(/h\d/i).test(e.nodeName)},text:function(bS){var e=bS.getAttribute("type"),bR=bS.type;return bS.nodeName.toLowerCase()==="input"&&"text"===bR&&(e===bR||e===null)},radio:function(e){return e.nodeName.toLowerCase()==="input"&&"radio"===e.type},checkbox:function(e){return e.nodeName.toLowerCase()==="input"&&"checkbox"===e.type},file:function(e){return e.nodeName.toLowerCase()==="input"&&"file"===e.type},password:function(e){return e.nodeName.toLowerCase()==="input"&&"password"===e.type},submit:function(bR){var e=bR.nodeName.toLowerCase();return(e==="input"||e==="button")&&"submit"===bR.type},image:function(e){return e.nodeName.toLowerCase()==="input"&&"image"===e.type},reset:function(bR){var e=bR.nodeName.toLowerCase();return(e==="input"||e==="button")&&"reset"===bR.type},button:function(bR){var e=bR.nodeName.toLowerCase();return e==="input"&&"button"===bR.type||e==="button"},input:function(e){return(/input|select|textarea|button/i).test(e.nodeName)},focus:function(e){return e===e.ownerDocument.activeElement}},setFilters:{first:function(bR,e){return e===0},last:function(bS,bR,e,bT){return bR===bT.length-1},even:function(bR,e){return e%2===0},odd:function(bR,e){return e%2===1},lt:function(bS,bR,e){return bRe[3]-0},nth:function(bS,bR,e){return e[3]-0===bR},eq:function(bS,bR,e){return e[3]-0===bR}},filter:{PSEUDO:function(bS,bX,bW,bY){var e=bX[1],bR=bE.filters[e];if(bR){return bR(bS,bW,bX,bY)}else{if(e==="contains"){return(bS.textContent||bS.innerText||bw([bS])||"").indexOf(bX[3])>=0}else{if(e==="not"){var bT=bX[3];for(var bV=0,bU=bT.length;bV=0)}}},ID:function(bR,e){return bR.nodeType===1&&bR.getAttribute("id")===e},TAG:function(bR,e){return(e==="*"&&bR.nodeType===1)||!!bR.nodeName&&bR.nodeName.toLowerCase()===e},CLASS:function(bR,e){return(" "+(bR.className||bR.getAttribute("class"))+" ").indexOf(e)>-1},ATTR:function(bV,bT){var bS=bT[1],e=by.attr?by.attr(bV,bS):bE.attrHandle[bS]?bE.attrHandle[bS](bV):bV[bS]!=null?bV[bS]:bV.getAttribute(bS),bW=e+"",bU=bT[2],bR=bT[4];return e==null?bU==="!=":!bU&&by.attr?e!=null:bU==="="?bW===bR:bU==="*="?bW.indexOf(bR)>=0:bU==="~="?(" "+bW+" ").indexOf(bR)>=0:!bR?bW&&e!==false:bU==="!="?bW!==bR:bU==="^="?bW.indexOf(bR)===0:bU==="$="?bW.substr(bW.length-bR.length)===bR:bU==="|="?bW===bR||bW.substr(0,bR.length+1)===bR+"-":false},POS:function(bU,bR,bS,bV){var e=bR[2],bT=bE.setFilters[e];if(bT){return bT(bU,bS,bR,bV)}}}};var bD=bE.match.POS,bx=function(bR,e){return"\\"+(e-0+1)};for(var bz in bE.match){bE.match[bz]=new RegExp(bE.match[bz].source+(/(?![^\[]*\])(?![^\(]*\))/.source));bE.leftMatch[bz]=new RegExp(/(^(?:.|\r|\n)*?)/.source+bE.match[bz].source.replace(/\\(\d+)/g,bx))}var bF=function(bR,e){bR=Array.prototype.slice.call(bR,0);if(e){e.push.apply(e,bR);return e}return bR};try{Array.prototype.slice.call(av.documentElement.childNodes,0)[0].nodeType}catch(bP){bF=function(bU,bT){var bS=0,bR=bT||[];if(bL.call(bU)==="[object Array]"){Array.prototype.push.apply(bR,bU)}else{if(typeof bU.length==="number"){for(var e=bU.length;bS";e.insertBefore(bR,e.firstChild);if(av.getElementById(bS)){bE.find.ID=function(bU,bV,bW){if(typeof bV.getElementById!=="undefined"&&!bW){var bT=bV.getElementById(bU[1]);return bT?bT.id===bU[1]||typeof bT.getAttributeNode!=="undefined"&&bT.getAttributeNode("id").nodeValue===bU[1]?[bT]:L:[]}};bE.filter.ID=function(bV,bT){var bU=typeof bV.getAttributeNode!=="undefined"&&bV.getAttributeNode("id");return bV.nodeType===1&&bU&&bU.nodeValue===bT}}e.removeChild(bR);e=bR=null})();(function(){var e=av.createElement("div");e.appendChild(av.createComment(""));if(e.getElementsByTagName("*").length>0){bE.find.TAG=function(bR,bV){var bU=bV.getElementsByTagName(bR[1]);if(bR[1]==="*"){var bT=[];for(var bS=0;bU[bS];bS++){if(bU[bS].nodeType===1){bT.push(bU[bS])}}bU=bT}return bU}}e.innerHTML="";if(e.firstChild&&typeof e.firstChild.getAttribute!=="undefined"&&e.firstChild.getAttribute("href")!=="#"){bE.attrHandle.href=function(bR){return bR.getAttribute("href",2)}}e=null})();if(av.querySelectorAll){(function(){var e=by,bT=av.createElement("div"),bS="__sizzle__";bT.innerHTML="

";if(bT.querySelectorAll&&bT.querySelectorAll(".TEST").length===0){return}by=function(b4,bV,bZ,b3){bV=bV||av;if(!b3&&!by.isXML(bV)){var b2=/^(\w+$)|^\.([\w\-]+$)|^#([\w\-]+$)/.exec(b4);if(b2&&(bV.nodeType===1||bV.nodeType===9)){if(b2[1]){return bF(bV.getElementsByTagName(b4),bZ)}else{if(b2[2]&&bE.find.CLASS&&bV.getElementsByClassName){return bF(bV.getElementsByClassName(b2[2]),bZ)}}}if(bV.nodeType===9){if(b4==="body"&&bV.body){return bF([bV.body],bZ)}else{if(b2&&b2[3]){var bY=bV.getElementById(b2[3]);if(bY&&bY.parentNode){if(bY.id===b2[3]){return bF([bY],bZ)}}else{return bF([],bZ)}}}try{return bF(bV.querySelectorAll(b4),bZ)}catch(b0){}}else{if(bV.nodeType===1&&bV.nodeName.toLowerCase()!=="object"){var bW=bV,bX=bV.getAttribute("id"),bU=bX||bS,b6=bV.parentNode,b5=/^\s*[+~]/.test(b4);if(!bX){bV.setAttribute("id",bU)}else{bU=bU.replace(/'/g,"\\$&")}if(b5&&b6){bV=bV.parentNode}try{if(!b5||b6){return bF(bV.querySelectorAll("[id='"+bU+"'] "+b4),bZ)}}catch(b1){}finally{if(!bX){bW.removeAttribute("id")}}}}}return e(b4,bV,bZ,b3)};for(var bR in e){by[bR]=e[bR]}bT=null})()}(function(){var e=av.documentElement,bS=e.matchesSelector||e.mozMatchesSelector||e.webkitMatchesSelector||e.msMatchesSelector;if(bS){var bU=!bS.call(av.createElement("div"),"div"),bR=false;try{bS.call(av.documentElement,"[test!='']:sizzle")}catch(bT){bR=true}by.matchesSelector=function(bW,bY){bY=bY.replace(/\=\s*([^'"\]]*)\s*\]/g,"='$1']");if(!by.isXML(bW)){try{if(bR||!bE.match.PSEUDO.test(bY)&&!/!=/.test(bY)){var bV=bS.call(bW,bY);if(bV||!bU||bW.document&&bW.document.nodeType!==11){return bV}}}catch(bX){}}return by(bY,null,null,[bW]).length>0}}})();(function(){var e=av.createElement("div");e.innerHTML="
";if(!e.getElementsByClassName||e.getElementsByClassName("e").length===0){return}e.lastChild.className="e";if(e.getElementsByClassName("e").length===1){return}bE.order.splice(1,0,"CLASS");bE.find.CLASS=function(bR,bS,bT){if(typeof bS.getElementsByClassName!=="undefined"&&!bT){return bS.getElementsByClassName(bR[1])}};e=null})();function bv(bR,bW,bV,bZ,bX,bY){for(var bT=0,bS=bZ.length;bT0){bU=e;break}}}e=e[bR]}bZ[bT]=bU}}}if(av.documentElement.contains){by.contains=function(bR,e){return bR!==e&&(bR.contains?bR.contains(e):true)}}else{if(av.documentElement.compareDocumentPosition){by.contains=function(bR,e){return !!(bR.compareDocumentPosition(e)&16)}}else{by.contains=function(){return false}}}by.isXML=function(e){var bR=(e?e.ownerDocument||e:0).documentElement;return bR?bR.nodeName!=="HTML":false};var bM=function(bS,e,bW){var bV,bX=[],bU="",bY=e.nodeType?[e]:e;while((bV=bE.match.PSEUDO.exec(bS))){bU+=bV[0];bS=bS.replace(bE.match.PSEUDO,"")}bS=bE.relative[bS]?bS+"*":bS;for(var bT=0,bR=bY.length;bT0){for(bB=bA;bB=0:b.filter(e,this).length>0:this.filter(e).length>0)},closest:function(by,bx){var bv=[],bw,e,bz=this[0];if(b.isArray(by)){var bB=1;while(bz&&bz.ownerDocument&&bz!==bx){for(bw=0;bw-1:b.find.matchesSelector(bz,by)){bv.push(bz);break}else{bz=bz.parentNode;if(!bz||!bz.ownerDocument||bz===bx||bz.nodeType===11){break}}}}bv=bv.length>1?b.unique(bv):bv;return this.pushStack(bv,"closest",by)},index:function(e){if(!e){return(this[0]&&this[0].parentNode)?this.prevAll().length:-1}if(typeof e==="string"){return b.inArray(this[0],b(e))}return b.inArray(e.jquery?e[0]:e,this)},add:function(e,bv){var bx=typeof e==="string"?b(e,bv):b.makeArray(e&&e.nodeType?[e]:e),bw=b.merge(this.get(),bx);return this.pushStack(C(bx[0])||C(bw[0])?bw:b.unique(bw))},andSelf:function(){return this.add(this.prevObject)}});function C(e){return !e||!e.parentNode||e.parentNode.nodeType===11}b.each({parent:function(bv){var e=bv.parentNode;return e&&e.nodeType!==11?e:null},parents:function(e){return b.dir(e,"parentNode")},parentsUntil:function(bv,e,bw){return b.dir(bv,"parentNode",bw)},next:function(e){return b.nth(e,2,"nextSibling")},prev:function(e){return b.nth(e,2,"previousSibling")},nextAll:function(e){return b.dir(e,"nextSibling")},prevAll:function(e){return b.dir(e,"previousSibling")},nextUntil:function(bv,e,bw){return b.dir(bv,"nextSibling",bw)},prevUntil:function(bv,e,bw){return b.dir(bv,"previousSibling",bw)},siblings:function(e){return b.sibling(e.parentNode.firstChild,e)},children:function(e){return b.sibling(e.firstChild)},contents:function(e){return b.nodeName(e,"iframe")?e.contentDocument||e.contentWindow.document:b.makeArray(e.childNodes)}},function(e,bv){b.fn[e]=function(by,bw){var bx=b.map(this,bv,by);if(!ab.test(e)){bw=by}if(bw&&typeof bw==="string"){bx=b.filter(bw,bx)}bx=this.length>1&&!ay[e]?b.unique(bx):bx;if((this.length>1||a9.test(bw))&&aq.test(e)){bx=bx.reverse()}return this.pushStack(bx,e,P.call(arguments).join(","))}});b.extend({filter:function(bw,e,bv){if(bv){bw=":not("+bw+")"}return e.length===1?b.find.matchesSelector(e[0],bw)?[e[0]]:[]:b.find.matches(bw,e)},dir:function(bw,bv,by){var e=[],bx=bw[bv];while(bx&&bx.nodeType!==9&&(by===L||bx.nodeType!==1||!b(bx).is(by))){if(bx.nodeType===1){e.push(bx)}bx=bx[bv]}return e},nth:function(by,e,bw,bx){e=e||1;var bv=0;for(;by;by=by[bw]){if(by.nodeType===1&&++bv===e){break}}return by},sibling:function(bw,bv){var e=[];for(;bw;bw=bw.nextSibling){if(bw.nodeType===1&&bw!==bv){e.push(bw)}}return e}});function aG(bx,bw,e){bw=bw||0;if(b.isFunction(bw)){return b.grep(bx,function(bz,by){var bA=!!bw.call(bz,by,bz);return bA===e})}else{if(bw.nodeType){return b.grep(bx,function(bz,by){return(bz===bw)===e})}else{if(typeof bw==="string"){var bv=b.grep(bx,function(by){return by.nodeType===1});if(bp.test(bw)){return b.filter(bw,bv,!e)}else{bw=b.filter(bw,bv)}}}}return b.grep(bx,function(bz,by){return(b.inArray(bz,bw)>=0)===e})}function a(e){var bw=aR.split("|"),bv=e.createDocumentFragment();if(bv.createElement){while(bw.length){bv.createElement(bw.pop())}}return bv}var aR="abbr|article|aside|audio|canvas|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",ag=/ jQuery\d+="(?:\d+|null)"/g,ar=/^\s+/,R=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/ig,d=/<([\w:]+)/,w=/",""],legend:[1,"
","
"],thead:[1,"","
"],tr:[2,"","
"],td:[3,"","
"],col:[2,"","
"],area:[1,"",""],_default:[0,"",""]},ac=a(av);ax.optgroup=ax.option;ax.tbody=ax.tfoot=ax.colgroup=ax.caption=ax.thead;ax.th=ax.td;if(!b.support.htmlSerialize){ax._default=[1,"div
","
"]}b.fn.extend({text:function(e){if(b.isFunction(e)){return this.each(function(bw){var bv=b(this);bv.text(e.call(this,bw,bv.text()))})}if(typeof e!=="object"&&e!==L){return this.empty().append((this[0]&&this[0].ownerDocument||av).createTextNode(e))}return b.text(this)},wrapAll:function(e){if(b.isFunction(e)){return this.each(function(bw){b(this).wrapAll(e.call(this,bw))})}if(this[0]){var bv=b(e,this[0].ownerDocument).eq(0).clone(true);if(this[0].parentNode){bv.insertBefore(this[0])}bv.map(function(){var bw=this;while(bw.firstChild&&bw.firstChild.nodeType===1){bw=bw.firstChild}return bw}).append(this)}return this},wrapInner:function(e){if(b.isFunction(e)){return this.each(function(bv){b(this).wrapInner(e.call(this,bv))})}return this.each(function(){var bv=b(this),bw=bv.contents();if(bw.length){bw.wrapAll(e)}else{bv.append(e)}})},wrap:function(e){var bv=b.isFunction(e);return this.each(function(bw){b(this).wrapAll(bv?e.call(this,bw):e)})},unwrap:function(){return this.parent().each(function(){if(!b.nodeName(this,"body")){b(this).replaceWith(this.childNodes)}}).end()},append:function(){return this.domManip(arguments,true,function(e){if(this.nodeType===1){this.appendChild(e)}})},prepend:function(){return this.domManip(arguments,true,function(e){if(this.nodeType===1){this.insertBefore(e,this.firstChild)}})},before:function(){if(this[0]&&this[0].parentNode){return this.domManip(arguments,false,function(bv){this.parentNode.insertBefore(bv,this)})}else{if(arguments.length){var e=b.clean(arguments);e.push.apply(e,this.toArray());return this.pushStack(e,"before",arguments)}}},after:function(){if(this[0]&&this[0].parentNode){return this.domManip(arguments,false,function(bv){this.parentNode.insertBefore(bv,this.nextSibling)})}else{if(arguments.length){var e=this.pushStack(this,"after",arguments);e.push.apply(e,b.clean(arguments));return e}}},remove:function(e,bx){for(var bv=0,bw;(bw=this[bv])!=null;bv++){if(!e||b.filter(e,[bw]).length){if(!bx&&bw.nodeType===1){b.cleanData(bw.getElementsByTagName("*"));b.cleanData([bw])}if(bw.parentNode){bw.parentNode.removeChild(bw)}}}return this},empty:function(){for(var e=0,bv;(bv=this[e])!=null;e++){if(bv.nodeType===1){b.cleanData(bv.getElementsByTagName("*"))}while(bv.firstChild){bv.removeChild(bv.firstChild)}}return this},clone:function(bv,e){bv=bv==null?false:bv;e=e==null?bv:e;return this.map(function(){return b.clone(this,bv,e)})},html:function(bx){if(bx===L){return this[0]&&this[0].nodeType===1?this[0].innerHTML.replace(ag,""):null}else{if(typeof bx==="string"&&!ae.test(bx)&&(b.support.leadingWhitespace||!ar.test(bx))&&!ax[(d.exec(bx)||["",""])[1].toLowerCase()]){bx=bx.replace(R,"<$1>");try{for(var bw=0,bv=this.length;bw1&&bw0?this.clone(true):this).get();b(bC[bA])[bv](by);bz=bz.concat(by)}return this.pushStack(bz,e,bC.selector)}}});function bg(e){if(typeof e.getElementsByTagName!=="undefined"){return e.getElementsByTagName("*")}else{if(typeof e.querySelectorAll!=="undefined"){return e.querySelectorAll("*")}else{return[]}}}function az(e){if(e.type==="checkbox"||e.type==="radio"){e.defaultChecked=e.checked}}function E(e){var bv=(e.nodeName||"").toLowerCase();if(bv==="input"){az(e)}else{if(bv!=="script"&&typeof e.getElementsByTagName!=="undefined"){b.grep(e.getElementsByTagName("input"),az)}}}function al(e){var bv=av.createElement("div");ac.appendChild(bv);bv.innerHTML=e.outerHTML;return bv.firstChild}b.extend({clone:function(by,bA,bw){var e,bv,bx,bz=b.support.html5Clone||!ah.test("<"+by.nodeName)?by.cloneNode(true):al(by);if((!b.support.noCloneEvent||!b.support.noCloneChecked)&&(by.nodeType===1||by.nodeType===11)&&!b.isXMLDoc(by)){ai(by,bz);e=bg(by);bv=bg(bz);for(bx=0;e[bx];++bx){if(bv[bx]){ai(e[bx],bv[bx])}}}if(bA){t(by,bz);if(bw){e=bg(by);bv=bg(bz);for(bx=0;e[bx];++bx){t(e[bx],bv[bx])}}}e=bv=null;return bz},clean:function(bw,by,bH,bA){var bF;by=by||av;if(typeof by.createElement==="undefined"){by=by.ownerDocument||by[0]&&by[0].ownerDocument||av}var bI=[],bB;for(var bE=0,bz;(bz=bw[bE])!=null;bE++){if(typeof bz==="number"){bz+=""}if(!bz){continue}if(typeof bz==="string"){if(!W.test(bz)){bz=by.createTextNode(bz)}else{bz=bz.replace(R,"<$1>");var bK=(d.exec(bz)||["",""])[1].toLowerCase(),bx=ax[bK]||ax._default,bD=bx[0],bv=by.createElement("div");if(by===av){ac.appendChild(bv)}else{a(by).appendChild(bv)}bv.innerHTML=bx[1]+bz+bx[2];while(bD--){bv=bv.lastChild}if(!b.support.tbody){var e=w.test(bz),bC=bK==="table"&&!e?bv.firstChild&&bv.firstChild.childNodes:bx[1]===""&&!e?bv.childNodes:[];for(bB=bC.length-1;bB>=0;--bB){if(b.nodeName(bC[bB],"tbody")&&!bC[bB].childNodes.length){bC[bB].parentNode.removeChild(bC[bB])}}}if(!b.support.leadingWhitespace&&ar.test(bz)){bv.insertBefore(by.createTextNode(ar.exec(bz)[0]),bv.firstChild)}bz=bv.childNodes}}var bG;if(!b.support.appendChecked){if(bz[0]&&typeof(bG=bz.length)==="number"){for(bB=0;bB=0){return bx+"px"}}else{return bx}}}});if(!b.support.opacity){b.cssHooks.opacity={get:function(bv,e){return au.test((e&&bv.currentStyle?bv.currentStyle.filter:bv.style.filter)||"")?(parseFloat(RegExp.$1)/100)+"":e?"1":""},set:function(by,bz){var bx=by.style,bv=by.currentStyle,e=b.isNumeric(bz)?"alpha(opacity="+bz*100+")":"",bw=bv&&bv.filter||bx.filter||"";bx.zoom=1;if(bz>=1&&b.trim(bw.replace(ak,""))===""){bx.removeAttribute("filter");if(bv&&!bv.filter){return}}bx.filter=ak.test(bw)?bw.replace(ak,e):bw+" "+e}}}b(function(){if(!b.support.reliableMarginRight){b.cssHooks.marginRight={get:function(bw,bv){var e;b.swap(bw,{display:"inline-block"},function(){if(bv){e=Z(bw,"margin-right","marginRight")}else{e=bw.style.marginRight}});return e}}}});if(av.defaultView&&av.defaultView.getComputedStyle){aI=function(by,bw){var bv,bx,e;bw=bw.replace(z,"-$1").toLowerCase();if((bx=by.ownerDocument.defaultView)&&(e=bx.getComputedStyle(by,null))){bv=e.getPropertyValue(bw);if(bv===""&&!b.contains(by.ownerDocument.documentElement,by)){bv=b.style(by,bw)}}return bv}}if(av.documentElement.currentStyle){aX=function(bz,bw){var bA,e,by,bv=bz.currentStyle&&bz.currentStyle[bw],bx=bz.style;if(bv===null&&bx&&(by=bx[bw])){bv=by}if(!bc.test(bv)&&bn.test(bv)){bA=bx.left;e=bz.runtimeStyle&&bz.runtimeStyle.left;if(e){bz.runtimeStyle.left=bz.currentStyle.left}bx.left=bw==="fontSize"?"1em":(bv||0);bv=bx.pixelLeft+"px";bx.left=bA;if(e){bz.runtimeStyle.left=e}}return bv===""?"auto":bv}}Z=aI||aX;function p(by,bw,bv){var bA=bw==="width"?by.offsetWidth:by.offsetHeight,bz=bw==="width"?an:a1,bx=0,e=bz.length;if(bA>0){if(bv!=="border"){for(;bx)<[^<]*)*<\/script>/gi,q=/^(?:select|textarea)/i,h=/\s+/,br=/([?&])_=[^&]*/,K=/^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+))?)?/,A=b.fn.load,aa={},r={},aE,s,aV=["*/"]+["*"];try{aE=bl.href}catch(aw){aE=av.createElement("a");aE.href="";aE=aE.href}s=K.exec(aE.toLowerCase())||[];function f(e){return function(by,bA){if(typeof by!=="string"){bA=by;by="*"}if(b.isFunction(bA)){var bx=by.toLowerCase().split(h),bw=0,bz=bx.length,bv,bB,bC;for(;bw=0){var e=bw.slice(by,bw.length);bw=bw.slice(0,by)}var bx="GET";if(bz){if(b.isFunction(bz)){bA=bz;bz=L}else{if(typeof bz==="object"){bz=b.param(bz,b.ajaxSettings.traditional);bx="POST"}}}var bv=this;b.ajax({url:bw,type:bx,dataType:"html",data:bz,complete:function(bC,bB,bD){bD=bC.responseText;if(bC.isResolved()){bC.done(function(bE){bD=bE});bv.html(e?b("
").append(bD.replace(a6,"")).find(e):bD)}if(bA){bv.each(bA,[bD,bB,bC])}}});return this},serialize:function(){return b.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?b.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||q.test(this.nodeName)||aZ.test(this.type))}).map(function(e,bv){var bw=b(this).val();return bw==null?null:b.isArray(bw)?b.map(bw,function(by,bx){return{name:bv.name,value:by.replace(bs,"\r\n")}}):{name:bv.name,value:bw.replace(bs,"\r\n")}}).get()}});b.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "),function(e,bv){b.fn[bv]=function(bw){return this.on(bv,bw)}});b.each(["get","post"],function(e,bv){b[bv]=function(bw,by,bz,bx){if(b.isFunction(by)){bx=bx||bz;bz=by;by=L}return b.ajax({type:bv,url:bw,data:by,success:bz,dataType:bx})}});b.extend({getScript:function(e,bv){return b.get(e,L,bv,"script")},getJSON:function(e,bv,bw){return b.get(e,bv,bw,"json")},ajaxSetup:function(bv,e){if(e){am(bv,b.ajaxSettings)}else{e=bv;bv=b.ajaxSettings}am(bv,e);return bv},ajaxSettings:{url:aE,isLocal:aM.test(s[1]),global:true,type:"GET",contentType:"application/x-www-form-urlencoded",processData:true,async:true,accepts:{xml:"application/xml, text/xml",html:"text/html",text:"text/plain",json:"application/json, text/javascript","*":aV},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText"},converters:{"* text":bb.String,"text html":true,"text json":b.parseJSON,"text xml":b.parseXML},flatOptions:{context:true,url:true}},ajaxPrefilter:f(aa),ajaxTransport:f(r),ajax:function(bz,bx){if(typeof bz==="object"){bx=bz;bz=L}bx=bx||{};var bD=b.ajaxSetup({},bx),bS=bD.context||bD,bG=bS!==bD&&(bS.nodeType||bS instanceof b)?b(bS):b.event,bR=b.Deferred(),bN=b.Callbacks("once memory"),bB=bD.statusCode||{},bC,bH={},bO={},bQ,by,bL,bE,bI,bA=0,bw,bK,bJ={readyState:0,setRequestHeader:function(bT,bU){if(!bA){var e=bT.toLowerCase();bT=bO[e]=bO[e]||bT;bH[bT]=bU}return this},getAllResponseHeaders:function(){return bA===2?bQ:null},getResponseHeader:function(bT){var e;if(bA===2){if(!by){by={};while((e=aD.exec(bQ))){by[e[1].toLowerCase()]=e[2]}}e=by[bT.toLowerCase()]}return e===L?null:e},overrideMimeType:function(e){if(!bA){bD.mimeType=e}return this},abort:function(e){e=e||"abort";if(bL){bL.abort(e)}bF(0,e);return this}};function bF(bZ,bU,b0,bW){if(bA===2){return}bA=2;if(bE){clearTimeout(bE)}bL=L;bQ=bW||"";bJ.readyState=bZ>0?4:0;var bT,b4,b3,bX=bU,bY=b0?bj(bD,bJ,b0):L,bV,b2;if(bZ>=200&&bZ<300||bZ===304){if(bD.ifModified){if((bV=bJ.getResponseHeader("Last-Modified"))){b.lastModified[bC]=bV}if((b2=bJ.getResponseHeader("Etag"))){b.etag[bC]=b2}}if(bZ===304){bX="notmodified";bT=true}else{try{b4=G(bD,bY);bX="success";bT=true}catch(b1){bX="parsererror";b3=b1}}}else{b3=bX;if(!bX||bZ){bX="error";if(bZ<0){bZ=0}}}bJ.status=bZ;bJ.statusText=""+(bU||bX);if(bT){bR.resolveWith(bS,[b4,bX,bJ])}else{bR.rejectWith(bS,[bJ,bX,b3])}bJ.statusCode(bB);bB=L;if(bw){bG.trigger("ajax"+(bT?"Success":"Error"),[bJ,bD,bT?b4:b3])}bN.fireWith(bS,[bJ,bX]);if(bw){bG.trigger("ajaxComplete",[bJ,bD]);if(!(--b.active)){b.event.trigger("ajaxStop")}}}bR.promise(bJ);bJ.success=bJ.done;bJ.error=bJ.fail;bJ.complete=bN.add;bJ.statusCode=function(bT){if(bT){var e;if(bA<2){for(e in bT){bB[e]=[bB[e],bT[e]]}}else{e=bT[bJ.status];bJ.then(e,e)}}return this};bD.url=((bz||bD.url)+"").replace(bq,"").replace(c,s[1]+"//");bD.dataTypes=b.trim(bD.dataType||"*").toLowerCase().split(h);if(bD.crossDomain==null){bI=K.exec(bD.url.toLowerCase());bD.crossDomain=!!(bI&&(bI[1]!=s[1]||bI[2]!=s[2]||(bI[3]||(bI[1]==="http:"?80:443))!=(s[3]||(s[1]==="http:"?80:443))))}if(bD.data&&bD.processData&&typeof bD.data!=="string"){bD.data=b.param(bD.data,bD.traditional)}aW(aa,bD,bx,bJ);if(bA===2){return false}bw=bD.global;bD.type=bD.type.toUpperCase();bD.hasContent=!aQ.test(bD.type);if(bw&&b.active++===0){b.event.trigger("ajaxStart")}if(!bD.hasContent){if(bD.data){bD.url+=(M.test(bD.url)?"&":"?")+bD.data;delete bD.data}bC=bD.url;if(bD.cache===false){var bv=b.now(),bP=bD.url.replace(br,"$1_="+bv);bD.url=bP+((bP===bD.url)?(M.test(bD.url)?"&":"?")+"_="+bv:"")}}if(bD.data&&bD.hasContent&&bD.contentType!==false||bx.contentType){bJ.setRequestHeader("Content-Type",bD.contentType)}if(bD.ifModified){bC=bC||bD.url;if(b.lastModified[bC]){bJ.setRequestHeader("If-Modified-Since",b.lastModified[bC])}if(b.etag[bC]){bJ.setRequestHeader("If-None-Match",b.etag[bC])}}bJ.setRequestHeader("Accept",bD.dataTypes[0]&&bD.accepts[bD.dataTypes[0]]?bD.accepts[bD.dataTypes[0]]+(bD.dataTypes[0]!=="*"?", "+aV+"; q=0.01":""):bD.accepts["*"]);for(bK in bD.headers){bJ.setRequestHeader(bK,bD.headers[bK])}if(bD.beforeSend&&(bD.beforeSend.call(bS,bJ,bD)===false||bA===2)){bJ.abort();return false}for(bK in {success:1,error:1,complete:1}){bJ[bK](bD[bK])}bL=aW(r,bD,bx,bJ);if(!bL){bF(-1,"No Transport")}else{bJ.readyState=1;if(bw){bG.trigger("ajaxSend",[bJ,bD])}if(bD.async&&bD.timeout>0){bE=setTimeout(function(){bJ.abort("timeout")},bD.timeout)}try{bA=1;bL.send(bH,bF)}catch(bM){if(bA<2){bF(-1,bM)}else{throw bM}}}return bJ},param:function(e,bw){var bv=[],by=function(bz,bA){bA=b.isFunction(bA)?bA():bA;bv[bv.length]=encodeURIComponent(bz)+"="+encodeURIComponent(bA)};if(bw===L){bw=b.ajaxSettings.traditional}if(b.isArray(e)||(e.jquery&&!b.isPlainObject(e))){b.each(e,function(){by(this.name,this.value)})}else{for(var bx in e){v(bx,e[bx],bw,by)}}return bv.join("&").replace(k,"+")}});function v(bw,by,bv,bx){if(b.isArray(by)){b.each(by,function(bA,bz){if(bv||ap.test(bw)){bx(bw,bz)}else{v(bw+"["+(typeof bz==="object"||b.isArray(bz)?bA:"")+"]",bz,bv,bx)}})}else{if(!bv&&by!=null&&typeof by==="object"){for(var e in by){v(bw+"["+e+"]",by[e],bv,bx)}}else{bx(bw,by)}}}b.extend({active:0,lastModified:{},etag:{}});function bj(bD,bC,bz){var bv=bD.contents,bB=bD.dataTypes,bw=bD.responseFields,by,bA,bx,e;for(bA in bw){if(bA in bz){bC[bw[bA]]=bz[bA]}}while(bB[0]==="*"){bB.shift();if(by===L){by=bD.mimeType||bC.getResponseHeader("content-type")}}if(by){for(bA in bv){if(bv[bA]&&bv[bA].test(by)){bB.unshift(bA);break}}}if(bB[0] in bz){bx=bB[0]}else{for(bA in bz){if(!bB[0]||bD.converters[bA+" "+bB[0]]){bx=bA;break}if(!e){e=bA}}bx=bx||e}if(bx){if(bx!==bB[0]){bB.unshift(bx)}return bz[bx]}}function G(bH,bz){if(bH.dataFilter){bz=bH.dataFilter(bz,bH.dataType)}var bD=bH.dataTypes,bG={},bA,bE,bw=bD.length,bB,bC=bD[0],bx,by,bF,bv,e;for(bA=1;bA=bw.duration+this.startTime){this.now=this.end;this.pos=this.state=1;this.update();bw.animatedProperties[this.prop]=true;for(bA in bw.animatedProperties){if(bw.animatedProperties[bA]!==true){e=false}}if(e){if(bw.overflow!=null&&!b.support.shrinkWrapBlocks){b.each(["","X","Y"],function(bC,bD){bz.style["overflow"+bD]=bw.overflow[bC]})}if(bw.hide){b(bz).hide()}if(bw.hide||bw.show){for(bA in bw.animatedProperties){b.style(bz,bA,bw.orig[bA]);b.removeData(bz,"fxshow"+bA,true);b.removeData(bz,"toggle"+bA,true)}}bv=bw.complete;if(bv){bw.complete=false;bv.call(bz)}}return false}else{if(bw.duration==Infinity){this.now=bx}else{bB=bx-this.startTime;this.state=bB/bw.duration;this.pos=b.easing[bw.animatedProperties[this.prop]](this.state,bB,0,1,bw.duration);this.now=this.start+((this.end-this.start)*this.pos)}this.update()}return true}};b.extend(b.fx,{tick:function(){var bw,bv=b.timers,e=0;for(;e").appendTo(e),bw=bv.css("display");bv.remove();if(bw==="none"||bw===""){if(!a8){a8=av.createElement("iframe");a8.frameBorder=a8.width=a8.height=0}e.appendChild(a8);if(!m||!a8.createElement){m=(a8.contentWindow||a8.contentDocument).document;m.write((av.compatMode==="CSS1Compat"?"":"")+"");m.close()}bv=m.createElement(bx);m.body.appendChild(bv);bw=b.css(bv,"display");e.removeChild(a8)}Q[bx]=bw}return Q[bx]}var V=/^t(?:able|d|h)$/i,ad=/^(?:body|html)$/i;if("getBoundingClientRect" in av.documentElement){b.fn.offset=function(bI){var by=this[0],bB;if(bI){return this.each(function(e){b.offset.setOffset(this,bI,e)})}if(!by||!by.ownerDocument){return null}if(by===by.ownerDocument.body){return b.offset.bodyOffset(by)}try{bB=by.getBoundingClientRect()}catch(bF){}var bH=by.ownerDocument,bw=bH.documentElement;if(!bB||!b.contains(bw,by)){return bB?{top:bB.top,left:bB.left}:{top:0,left:0}}var bC=bH.body,bD=aK(bH),bA=bw.clientTop||bC.clientTop||0,bE=bw.clientLeft||bC.clientLeft||0,bv=bD.pageYOffset||b.support.boxModel&&bw.scrollTop||bC.scrollTop,bz=bD.pageXOffset||b.support.boxModel&&bw.scrollLeft||bC.scrollLeft,bG=bB.top+bv-bA,bx=bB.left+bz-bE;return{top:bG,left:bx}}}else{b.fn.offset=function(bF){var bz=this[0];if(bF){return this.each(function(bG){b.offset.setOffset(this,bF,bG)})}if(!bz||!bz.ownerDocument){return null}if(bz===bz.ownerDocument.body){return b.offset.bodyOffset(bz)}var bC,bw=bz.offsetParent,bv=bz,bE=bz.ownerDocument,bx=bE.documentElement,bA=bE.body,bB=bE.defaultView,e=bB?bB.getComputedStyle(bz,null):bz.currentStyle,bD=bz.offsetTop,by=bz.offsetLeft;while((bz=bz.parentNode)&&bz!==bA&&bz!==bx){if(b.support.fixedPosition&&e.position==="fixed"){break}bC=bB?bB.getComputedStyle(bz,null):bz.currentStyle;bD-=bz.scrollTop;by-=bz.scrollLeft;if(bz===bw){bD+=bz.offsetTop;by+=bz.offsetLeft;if(b.support.doesNotAddBorder&&!(b.support.doesAddBorderForTableAndCells&&V.test(bz.nodeName))){bD+=parseFloat(bC.borderTopWidth)||0;by+=parseFloat(bC.borderLeftWidth)||0}bv=bw;bw=bz.offsetParent}if(b.support.subtractsBorderForOverflowNotVisible&&bC.overflow!=="visible"){bD+=parseFloat(bC.borderTopWidth)||0;by+=parseFloat(bC.borderLeftWidth)||0}e=bC}if(e.position==="relative"||e.position==="static"){bD+=bA.offsetTop;by+=bA.offsetLeft}if(b.support.fixedPosition&&e.position==="fixed"){bD+=Math.max(bx.scrollTop,bA.scrollTop);by+=Math.max(bx.scrollLeft,bA.scrollLeft)}return{top:bD,left:by}}}b.offset={bodyOffset:function(e){var bw=e.offsetTop,bv=e.offsetLeft;if(b.support.doesNotIncludeMarginInBodyOffset){bw+=parseFloat(b.css(e,"marginTop"))||0;bv+=parseFloat(b.css(e,"marginLeft"))||0}return{top:bw,left:bv}},setOffset:function(bx,bG,bA){var bB=b.css(bx,"position");if(bB==="static"){bx.style.position="relative"}var bz=b(bx),bv=bz.offset(),e=b.css(bx,"top"),bE=b.css(bx,"left"),bF=(bB==="absolute"||bB==="fixed")&&b.inArray("auto",[e,bE])>-1,bD={},bC={},bw,by;if(bF){bC=bz.position();bw=bC.top;by=bC.left}else{bw=parseFloat(e)||0;by=parseFloat(bE)||0}if(b.isFunction(bG)){bG=bG.call(bx,bA,bv)}if(bG.top!=null){bD.top=(bG.top-bv.top)+bw}if(bG.left!=null){bD.left=(bG.left-bv.left)+by}if("using" in bG){bG.using.call(bx,bD)}else{bz.css(bD)}}};b.fn.extend({position:function(){if(!this[0]){return null}var bw=this[0],bv=this.offsetParent(),bx=this.offset(),e=ad.test(bv[0].nodeName)?{top:0,left:0}:bv.offset();bx.top-=parseFloat(b.css(bw,"marginTop"))||0;bx.left-=parseFloat(b.css(bw,"marginLeft"))||0;e.top+=parseFloat(b.css(bv[0],"borderTopWidth"))||0;e.left+=parseFloat(b.css(bv[0],"borderLeftWidth"))||0;return{top:bx.top-e.top,left:bx.left-e.left}},offsetParent:function(){return this.map(function(){var e=this.offsetParent||av.body;while(e&&(!ad.test(e.nodeName)&&b.css(e,"position")==="static")){e=e.offsetParent}return e})}});b.each(["Left","Top"],function(bv,e){var bw="scroll"+e;b.fn[bw]=function(bz){var bx,by;if(bz===L){bx=this[0];if(!bx){return null}by=aK(bx);return by?("pageXOffset" in by)?by[bv?"pageYOffset":"pageXOffset"]:b.support.boxModel&&by.document.documentElement[bw]||by.document.body[bw]:bx[bw]}return this.each(function(){by=aK(this);if(by){by.scrollTo(!bv?bz:b(by).scrollLeft(),bv?bz:b(by).scrollTop())}else{this[bw]=bz}})}});function aK(e){return b.isWindow(e)?e:e.nodeType===9?e.defaultView||e.parentWindow:false}b.each(["Height","Width"],function(bv,e){var bw=e.toLowerCase();b.fn["inner"+e]=function(){var bx=this[0];return bx?bx.style?parseFloat(b.css(bx,bw,"padding")):this[bw]():null};b.fn["outer"+e]=function(by){var bx=this[0];return bx?bx.style?parseFloat(b.css(bx,bw,by?"margin":"border")):this[bw]():null};b.fn[bw]=function(bz){var bA=this[0];if(!bA){return bz==null?null:this}if(b.isFunction(bz)){return this.each(function(bE){var bD=b(this);bD[bw](bz.call(this,bE,bD[bw]()))})}if(b.isWindow(bA)){var bB=bA.document.documentElement["client"+e],bx=bA.document.body;return bA.document.compatMode==="CSS1Compat"&&bB||bx&&bx["client"+e]||bB}else{if(bA.nodeType===9){return Math.max(bA.documentElement["client"+e],bA.body["scroll"+e],bA.documentElement["scroll"+e],bA.body["offset"+e],bA.documentElement["offset"+e])}else{if(bz===L){var bC=b.css(bA,bw),by=parseFloat(bC);return b.isNumeric(by)?by:bC}else{return this.css(bw,typeof bz==="string"?bz:bz+"px")}}}}});bb.jQuery=bb.$=b;if(typeof define==="function"&&define.amd&&define.amd.jQuery){define("jquery",[],function(){return b})}})(window);/*! + * jQuery UI 1.8.18 + * + * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about) + * Dual licensed under the MIT or GPL Version 2 licenses. + * http://jquery.org/license + * + * http://docs.jquery.com/UI + */ +(function(a,d){a.ui=a.ui||{};if(a.ui.version){return}a.extend(a.ui,{version:"1.8.18",keyCode:{ALT:18,BACKSPACE:8,CAPS_LOCK:20,COMMA:188,COMMAND:91,COMMAND_LEFT:91,COMMAND_RIGHT:93,CONTROL:17,DELETE:46,DOWN:40,END:35,ENTER:13,ESCAPE:27,HOME:36,INSERT:45,LEFT:37,MENU:93,NUMPAD_ADD:107,NUMPAD_DECIMAL:110,NUMPAD_DIVIDE:111,NUMPAD_ENTER:108,NUMPAD_MULTIPLY:106,NUMPAD_SUBTRACT:109,PAGE_DOWN:34,PAGE_UP:33,PERIOD:190,RIGHT:39,SHIFT:16,SPACE:32,TAB:9,UP:38,WINDOWS:91}});a.fn.extend({propAttr:a.fn.prop||a.fn.attr,_focus:a.fn.focus,focus:function(e,f){return typeof e==="number"?this.each(function(){var g=this;setTimeout(function(){a(g).focus();if(f){f.call(g)}},e)}):this._focus.apply(this,arguments)},scrollParent:function(){var e;if((a.browser.msie&&(/(static|relative)/).test(this.css("position")))||(/absolute/).test(this.css("position"))){e=this.parents().filter(function(){return(/(relative|absolute|fixed)/).test(a.curCSS(this,"position",1))&&(/(auto|scroll)/).test(a.curCSS(this,"overflow",1)+a.curCSS(this,"overflow-y",1)+a.curCSS(this,"overflow-x",1))}).eq(0)}else{e=this.parents().filter(function(){return(/(auto|scroll)/).test(a.curCSS(this,"overflow",1)+a.curCSS(this,"overflow-y",1)+a.curCSS(this,"overflow-x",1))}).eq(0)}return(/fixed/).test(this.css("position"))||!e.length?a(document):e},zIndex:function(h){if(h!==d){return this.css("zIndex",h)}if(this.length){var f=a(this[0]),e,g;while(f.length&&f[0]!==document){e=f.css("position");if(e==="absolute"||e==="relative"||e==="fixed"){g=parseInt(f.css("zIndex"),10);if(!isNaN(g)&&g!==0){return g}}f=f.parent()}}return 0},disableSelection:function(){return this.bind((a.support.selectstart?"selectstart":"mousedown")+".ui-disableSelection",function(e){e.preventDefault()})},enableSelection:function(){return this.unbind(".ui-disableSelection")}});a.each(["Width","Height"],function(g,e){var f=e==="Width"?["Left","Right"]:["Top","Bottom"],h=e.toLowerCase(),k={innerWidth:a.fn.innerWidth,innerHeight:a.fn.innerHeight,outerWidth:a.fn.outerWidth,outerHeight:a.fn.outerHeight};function j(m,l,i,n){a.each(f,function(){l-=parseFloat(a.curCSS(m,"padding"+this,true))||0;if(i){l-=parseFloat(a.curCSS(m,"border"+this+"Width",true))||0}if(n){l-=parseFloat(a.curCSS(m,"margin"+this,true))||0}});return l}a.fn["inner"+e]=function(i){if(i===d){return k["inner"+e].call(this)}return this.each(function(){a(this).css(h,j(this,i)+"px")})};a.fn["outer"+e]=function(i,l){if(typeof i!=="number"){return k["outer"+e].call(this,i)}return this.each(function(){a(this).css(h,j(this,i,true,l)+"px")})}});function c(g,e){var j=g.nodeName.toLowerCase();if("area"===j){var i=g.parentNode,h=i.name,f;if(!g.href||!h||i.nodeName.toLowerCase()!=="map"){return false}f=a("img[usemap=#"+h+"]")[0];return !!f&&b(f)}return(/input|select|textarea|button|object/.test(j)?!g.disabled:"a"==j?g.href||e:e)&&b(g)}function b(e){return !a(e).parents().andSelf().filter(function(){return a.curCSS(this,"visibility")==="hidden"||a.expr.filters.hidden(this)}).length}a.extend(a.expr[":"],{data:function(g,f,e){return !!a.data(g,e[3])},focusable:function(e){return c(e,!isNaN(a.attr(e,"tabindex")))},tabbable:function(g){var e=a.attr(g,"tabindex"),f=isNaN(e);return(f||e>=0)&&c(g,!f)}});a(function(){var e=document.body,f=e.appendChild(f=document.createElement("div"));f.offsetHeight;a.extend(f.style,{minHeight:"100px",height:"auto",padding:0,borderWidth:0});a.support.minHeight=f.offsetHeight===100;a.support.selectstart="onselectstart" in f;e.removeChild(f).style.display="none"});a.extend(a.ui,{plugin:{add:function(f,g,j){var h=a.ui[f].prototype;for(var e in j){h.plugins[e]=h.plugins[e]||[];h.plugins[e].push([g,j[e]])}},call:function(e,g,f){var j=e.plugins[g];if(!j||!e.element[0].parentNode){return}for(var h=0;h0){return true}h[e]=1;g=(h[e]>0);h[e]=0;return g},isOverAxis:function(f,e,g){return(f>e)&&(f<(e+g))},isOver:function(j,f,i,h,e,g){return a.ui.isOverAxis(j,i,e)&&a.ui.isOverAxis(f,h,g)}})})(jQuery);/*! + * jQuery UI Widget 1.8.18 + * + * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about) + * Dual licensed under the MIT or GPL Version 2 licenses. + * http://jquery.org/license + * + * http://docs.jquery.com/UI/Widget + */ +(function(b,d){if(b.cleanData){var c=b.cleanData;b.cleanData=function(f){for(var g=0,h;(h=f[g])!=null;g++){try{b(h).triggerHandler("remove")}catch(j){}}c(f)}}else{var a=b.fn.remove;b.fn.remove=function(e,f){return this.each(function(){if(!f){if(!e||b.filter(e,[this]).length){b("*",this).add([this]).each(function(){try{b(this).triggerHandler("remove")}catch(g){}})}}return a.call(b(this),e,f)})}}b.widget=function(f,h,e){var g=f.split(".")[0],j;f=f.split(".")[1];j=g+"-"+f;if(!e){e=h;h=b.Widget}b.expr[":"][j]=function(k){return !!b.data(k,f)};b[g]=b[g]||{};b[g][f]=function(k,l){if(arguments.length){this._createWidget(k,l)}};var i=new h();i.options=b.extend(true,{},i.options);b[g][f].prototype=b.extend(true,i,{namespace:g,widgetName:f,widgetEventPrefix:b[g][f].prototype.widgetEventPrefix||f,widgetBaseClass:j},e);b.widget.bridge(f,b[g][f])};b.widget.bridge=function(f,e){b.fn[f]=function(i){var g=typeof i==="string",h=Array.prototype.slice.call(arguments,1),j=this;i=!g&&h.length?b.extend.apply(null,[true,i].concat(h)):i;if(g&&i.charAt(0)==="_"){return j}if(g){this.each(function(){var k=b.data(this,f),l=k&&b.isFunction(k[i])?k[i].apply(k,h):k;if(l!==k&&l!==d){j=l;return false}})}else{this.each(function(){var k=b.data(this,f);if(k){k.option(i||{})._init()}else{b.data(this,f,new e(i,this))}})}return j}};b.Widget=function(e,f){if(arguments.length){this._createWidget(e,f)}};b.Widget.prototype={widgetName:"widget",widgetEventPrefix:"",options:{disabled:false},_createWidget:function(f,g){b.data(g,this.widgetName,this);this.element=b(g);this.options=b.extend(true,{},this.options,this._getCreateOptions(),f);var e=this;this.element.bind("remove."+this.widgetName,function(){e.destroy()});this._create();this._trigger("create");this._init()},_getCreateOptions:function(){return b.metadata&&b.metadata.get(this.element[0])[this.widgetName]},_create:function(){},_init:function(){},destroy:function(){this.element.unbind("."+this.widgetName).removeData(this.widgetName);this.widget().unbind("."+this.widgetName).removeAttr("aria-disabled").removeClass(this.widgetBaseClass+"-disabled ui-state-disabled")},widget:function(){return this.element},option:function(f,g){var e=f;if(arguments.length===0){return b.extend({},this.options)}if(typeof f==="string"){if(g===d){return this.options[f]}e={};e[f]=g}this._setOptions(e);return this},_setOptions:function(f){var e=this;b.each(f,function(g,h){e._setOption(g,h)});return this},_setOption:function(e,f){this.options[e]=f;if(e==="disabled"){this.widget()[f?"addClass":"removeClass"](this.widgetBaseClass+"-disabled ui-state-disabled").attr("aria-disabled",f)}return this},enable:function(){return this._setOption("disabled",false)},disable:function(){return this._setOption("disabled",true)},_trigger:function(e,f,g){var j,i,h=this.options[e];g=g||{};f=b.Event(f);f.type=(e===this.widgetEventPrefix?e:this.widgetEventPrefix+e).toLowerCase();f.target=this.element[0];i=f.originalEvent;if(i){for(j in i){if(!(j in f)){f[j]=i[j]}}}this.element.trigger(f,g);return !(b.isFunction(h)&&h.call(this.element[0],f,g)===false||f.isDefaultPrevented())}}})(jQuery);/*! + * jQuery UI Mouse 1.8.18 + * + * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about) + * Dual licensed under the MIT or GPL Version 2 licenses. + * http://jquery.org/license + * + * http://docs.jquery.com/UI/Mouse + * + * Depends: + * jquery.ui.widget.js + */ +(function(b,c){var a=false;b(document).mouseup(function(d){a=false});b.widget("ui.mouse",{options:{cancel:":input,option",distance:1,delay:0},_mouseInit:function(){var d=this;this.element.bind("mousedown."+this.widgetName,function(e){return d._mouseDown(e)}).bind("click."+this.widgetName,function(e){if(true===b.data(e.target,d.widgetName+".preventClickEvent")){b.removeData(e.target,d.widgetName+".preventClickEvent");e.stopImmediatePropagation();return false}});this.started=false},_mouseDestroy:function(){this.element.unbind("."+this.widgetName)},_mouseDown:function(f){if(a){return}(this._mouseStarted&&this._mouseUp(f));this._mouseDownEvent=f;var e=this,g=(f.which==1),d=(typeof this.options.cancel=="string"&&f.target.nodeName?b(f.target).closest(this.options.cancel).length:false);if(!g||d||!this._mouseCapture(f)){return true}this.mouseDelayMet=!this.options.delay;if(!this.mouseDelayMet){this._mouseDelayTimer=setTimeout(function(){e.mouseDelayMet=true},this.options.delay)}if(this._mouseDistanceMet(f)&&this._mouseDelayMet(f)){this._mouseStarted=(this._mouseStart(f)!==false);if(!this._mouseStarted){f.preventDefault();return true}}if(true===b.data(f.target,this.widgetName+".preventClickEvent")){b.removeData(f.target,this.widgetName+".preventClickEvent")}this._mouseMoveDelegate=function(h){return e._mouseMove(h)};this._mouseUpDelegate=function(h){return e._mouseUp(h)};b(document).bind("mousemove."+this.widgetName,this._mouseMoveDelegate).bind("mouseup."+this.widgetName,this._mouseUpDelegate);f.preventDefault();a=true;return true},_mouseMove:function(d){if(b.browser.msie&&!(document.documentMode>=9)&&!d.button){return this._mouseUp(d)}if(this._mouseStarted){this._mouseDrag(d);return d.preventDefault()}if(this._mouseDistanceMet(d)&&this._mouseDelayMet(d)){this._mouseStarted=(this._mouseStart(this._mouseDownEvent,d)!==false);(this._mouseStarted?this._mouseDrag(d):this._mouseUp(d))}return !this._mouseStarted},_mouseUp:function(d){b(document).unbind("mousemove."+this.widgetName,this._mouseMoveDelegate).unbind("mouseup."+this.widgetName,this._mouseUpDelegate);if(this._mouseStarted){this._mouseStarted=false;if(d.target==this._mouseDownEvent.target){b.data(d.target,this.widgetName+".preventClickEvent",true)}this._mouseStop(d)}return false},_mouseDistanceMet:function(d){return(Math.max(Math.abs(this._mouseDownEvent.pageX-d.pageX),Math.abs(this._mouseDownEvent.pageY-d.pageY))>=this.options.distance)},_mouseDelayMet:function(d){return this.mouseDelayMet},_mouseStart:function(d){},_mouseDrag:function(d){},_mouseStop:function(d){},_mouseCapture:function(d){return true}})})(jQuery);(function(c,d){c.widget("ui.resizable",c.ui.mouse,{widgetEventPrefix:"resize",options:{alsoResize:false,animate:false,animateDuration:"slow",animateEasing:"swing",aspectRatio:false,autoHide:false,containment:false,ghost:false,grid:false,handles:"e,s,se",helper:false,maxHeight:null,maxWidth:null,minHeight:10,minWidth:10,zIndex:1000},_create:function(){var f=this,k=this.options;this.element.addClass("ui-resizable");c.extend(this,{_aspectRatio:!!(k.aspectRatio),aspectRatio:k.aspectRatio,originalElement:this.element,_proportionallyResizeElements:[],_helper:k.helper||k.ghost||k.animate?k.helper||"ui-resizable-helper":null});if(this.element[0].nodeName.match(/canvas|textarea|input|select|button|img/i)){this.element.wrap(c('
').css({position:this.element.css("position"),width:this.element.outerWidth(),height:this.element.outerHeight(),top:this.element.css("top"),left:this.element.css("left")}));this.element=this.element.parent().data("resizable",this.element.data("resizable"));this.elementIsWrapper=true;this.element.css({marginLeft:this.originalElement.css("marginLeft"),marginTop:this.originalElement.css("marginTop"),marginRight:this.originalElement.css("marginRight"),marginBottom:this.originalElement.css("marginBottom")});this.originalElement.css({marginLeft:0,marginTop:0,marginRight:0,marginBottom:0});this.originalResizeStyle=this.originalElement.css("resize");this.originalElement.css("resize","none");this._proportionallyResizeElements.push(this.originalElement.css({position:"static",zoom:1,display:"block"}));this.originalElement.css({margin:this.originalElement.css("margin")});this._proportionallyResize()}this.handles=k.handles||(!c(".ui-resizable-handle",this.element).length?"e,s,se":{n:".ui-resizable-n",e:".ui-resizable-e",s:".ui-resizable-s",w:".ui-resizable-w",se:".ui-resizable-se",sw:".ui-resizable-sw",ne:".ui-resizable-ne",nw:".ui-resizable-nw"});if(this.handles.constructor==String){if(this.handles=="all"){this.handles="n,e,s,w,se,sw,ne,nw"}var l=this.handles.split(",");this.handles={};for(var g=0;g
');if(/sw|se|ne|nw/.test(j)){h.css({zIndex:++k.zIndex})}if("se"==j){h.addClass("ui-icon ui-icon-gripsmall-diagonal-se")}this.handles[j]=".ui-resizable-"+j;this.element.append(h)}}this._renderAxis=function(q){q=q||this.element;for(var n in this.handles){if(this.handles[n].constructor==String){this.handles[n]=c(this.handles[n],this.element).show()}if(this.elementIsWrapper&&this.originalElement[0].nodeName.match(/textarea|input|select|button/i)){var o=c(this.handles[n],this.element),p=0;p=/sw|ne|nw|se|n|s/.test(n)?o.outerHeight():o.outerWidth();var m=["padding",/ne|nw|n/.test(n)?"Top":/se|sw|s/.test(n)?"Bottom":/^e$/.test(n)?"Right":"Left"].join("");q.css(m,p);this._proportionallyResize()}if(!c(this.handles[n]).length){continue}}};this._renderAxis(this.element);this._handles=c(".ui-resizable-handle",this.element).disableSelection();this._handles.mouseover(function(){if(!f.resizing){if(this.className){var i=this.className.match(/ui-resizable-(se|sw|ne|nw|n|e|s|w)/i)}f.axis=i&&i[1]?i[1]:"se"}});if(k.autoHide){this._handles.hide();c(this.element).addClass("ui-resizable-autohide").hover(function(){if(k.disabled){return}c(this).removeClass("ui-resizable-autohide");f._handles.show()},function(){if(k.disabled){return}if(!f.resizing){c(this).addClass("ui-resizable-autohide");f._handles.hide()}})}this._mouseInit()},destroy:function(){this._mouseDestroy();var e=function(g){c(g).removeClass("ui-resizable ui-resizable-disabled ui-resizable-resizing").removeData("resizable").unbind(".resizable").find(".ui-resizable-handle").remove()};if(this.elementIsWrapper){e(this.element);var f=this.element;f.after(this.originalElement.css({position:f.css("position"),width:f.outerWidth(),height:f.outerHeight(),top:f.css("top"),left:f.css("left")})).remove()}this.originalElement.css("resize",this.originalResizeStyle);e(this.originalElement);return this},_mouseCapture:function(f){var g=false;for(var e in this.handles){if(c(this.handles[e])[0]==f.target){g=true}}return !this.options.disabled&&g},_mouseStart:function(g){var j=this.options,f=this.element.position(),e=this.element;this.resizing=true;this.documentScroll={top:c(document).scrollTop(),left:c(document).scrollLeft()};if(e.is(".ui-draggable")||(/absolute/).test(e.css("position"))){e.css({position:"absolute",top:f.top,left:f.left})}this._renderProxy();var k=b(this.helper.css("left")),h=b(this.helper.css("top"));if(j.containment){k+=c(j.containment).scrollLeft()||0;h+=c(j.containment).scrollTop()||0}this.offset=this.helper.offset();this.position={left:k,top:h};this.size=this._helper?{width:e.outerWidth(),height:e.outerHeight()}:{width:e.width(),height:e.height()};this.originalSize=this._helper?{width:e.outerWidth(),height:e.outerHeight()}:{width:e.width(),height:e.height()};this.originalPosition={left:k,top:h};this.sizeDiff={width:e.outerWidth()-e.width(),height:e.outerHeight()-e.height()};this.originalMousePosition={left:g.pageX,top:g.pageY};this.aspectRatio=(typeof j.aspectRatio=="number")?j.aspectRatio:((this.originalSize.width/this.originalSize.height)||1);var i=c(".ui-resizable-"+this.axis).css("cursor");c("body").css("cursor",i=="auto"?this.axis+"-resize":i);e.addClass("ui-resizable-resizing");this._propagate("start",g);return true},_mouseDrag:function(e){var h=this.helper,g=this.options,m={},q=this,j=this.originalMousePosition,n=this.axis;var r=(e.pageX-j.left)||0,p=(e.pageY-j.top)||0;var i=this._change[n];if(!i){return false}var l=i.apply(this,[e,r,p]),k=c.browser.msie&&c.browser.version<7,f=this.sizeDiff;this._updateVirtualBoundaries(e.shiftKey);if(this._aspectRatio||e.shiftKey){l=this._updateRatio(l,e)}l=this._respectSize(l,e);this._propagate("resize",e);h.css({top:this.position.top+"px",left:this.position.left+"px",width:this.size.width+"px",height:this.size.height+"px"});if(!this._helper&&this._proportionallyResizeElements.length){this._proportionallyResize()}this._updateCache(l);this._trigger("resize",e,this.ui());return false},_mouseStop:function(h){this.resizing=false;var i=this.options,m=this;if(this._helper){var g=this._proportionallyResizeElements,e=g.length&&(/textarea/i).test(g[0].nodeName),f=e&&c.ui.hasScroll(g[0],"left")?0:m.sizeDiff.height,k=e?0:m.sizeDiff.width;var n={width:(m.helper.width()-k),height:(m.helper.height()-f)},j=(parseInt(m.element.css("left"),10)+(m.position.left-m.originalPosition.left))||null,l=(parseInt(m.element.css("top"),10)+(m.position.top-m.originalPosition.top))||null;if(!i.animate){this.element.css(c.extend(n,{top:l,left:j}))}m.helper.height(m.size.height);m.helper.width(m.size.width);if(this._helper&&!i.animate){this._proportionallyResize()}}c("body").css("cursor","auto");this.element.removeClass("ui-resizable-resizing");this._propagate("stop",h);if(this._helper){this.helper.remove()}return false},_updateVirtualBoundaries:function(g){var j=this.options,i,h,f,k,e;e={minWidth:a(j.minWidth)?j.minWidth:0,maxWidth:a(j.maxWidth)?j.maxWidth:Infinity,minHeight:a(j.minHeight)?j.minHeight:0,maxHeight:a(j.maxHeight)?j.maxHeight:Infinity};if(this._aspectRatio||g){i=e.minHeight*this.aspectRatio;f=e.minWidth/this.aspectRatio;h=e.maxHeight*this.aspectRatio;k=e.maxWidth/this.aspectRatio;if(i>e.minWidth){e.minWidth=i}if(f>e.minHeight){e.minHeight=f}if(hl.width),s=a(l.height)&&i.minHeight&&(i.minHeight>l.height);if(h){l.width=i.minWidth}if(s){l.height=i.minHeight}if(t){l.width=i.maxWidth}if(m){l.height=i.maxHeight}var f=this.originalPosition.left+this.originalSize.width,p=this.position.top+this.size.height;var k=/sw|nw|w/.test(q),e=/nw|ne|n/.test(q);if(h&&k){l.left=f-i.minWidth}if(t&&k){l.left=f-i.maxWidth}if(s&&e){l.top=p-i.minHeight}if(m&&e){l.top=p-i.maxHeight}var n=!l.width&&!l.height;if(n&&!l.left&&l.top){l.top=null}else{if(n&&!l.top&&l.left){l.left=null}}return l},_proportionallyResize:function(){var k=this.options;if(!this._proportionallyResizeElements.length){return}var g=this.helper||this.element;for(var f=0;f');var e=c.browser.msie&&c.browser.version<7,g=(e?1:0),h=(e?2:-1);this.helper.addClass(this._helper).css({width:this.element.outerWidth()+h,height:this.element.outerHeight()+h,position:"absolute",left:this.elementOffset.left-g+"px",top:this.elementOffset.top-g+"px",zIndex:++i.zIndex});this.helper.appendTo("body").disableSelection()}else{this.helper=this.element}},_change:{e:function(g,f,e){return{width:this.originalSize.width+f}},w:function(h,f,e){var j=this.options,g=this.originalSize,i=this.originalPosition;return{left:i.left+f,width:g.width-f}},n:function(h,f,e){var j=this.options,g=this.originalSize,i=this.originalPosition;return{top:i.top+e,height:g.height-e}},s:function(g,f,e){return{height:this.originalSize.height+e}},se:function(g,f,e){return c.extend(this._change.s.apply(this,arguments),this._change.e.apply(this,[g,f,e]))},sw:function(g,f,e){return c.extend(this._change.s.apply(this,arguments),this._change.w.apply(this,[g,f,e]))},ne:function(g,f,e){return c.extend(this._change.n.apply(this,arguments),this._change.e.apply(this,[g,f,e]))},nw:function(g,f,e){return c.extend(this._change.n.apply(this,arguments),this._change.w.apply(this,[g,f,e]))}},_propagate:function(f,e){c.ui.plugin.call(this,f,[e,this.ui()]);(f!="resize"&&this._trigger(f,e,this.ui()))},plugins:{},ui:function(){return{originalElement:this.originalElement,element:this.element,helper:this.helper,position:this.position,size:this.size,originalSize:this.originalSize,originalPosition:this.originalPosition}}});c.extend(c.ui.resizable,{version:"1.8.18"});c.ui.plugin.add("resizable","alsoResize",{start:function(f,g){var e=c(this).data("resizable"),i=e.options;var h=function(j){c(j).each(function(){var k=c(this);k.data("resizable-alsoresize",{width:parseInt(k.width(),10),height:parseInt(k.height(),10),left:parseInt(k.css("left"),10),top:parseInt(k.css("top"),10)})})};if(typeof(i.alsoResize)=="object"&&!i.alsoResize.parentNode){if(i.alsoResize.length){i.alsoResize=i.alsoResize[0];h(i.alsoResize)}else{c.each(i.alsoResize,function(j){h(j)})}}else{h(i.alsoResize)}},resize:function(g,i){var f=c(this).data("resizable"),j=f.options,h=f.originalSize,l=f.originalPosition;var k={height:(f.size.height-h.height)||0,width:(f.size.width-h.width)||0,top:(f.position.top-l.top)||0,left:(f.position.left-l.left)||0},e=function(m,n){c(m).each(function(){var q=c(this),r=c(this).data("resizable-alsoresize"),p={},o=n&&n.length?n:q.parents(i.originalElement[0]).length?["width","height"]:["width","height","top","left"];c.each(o,function(s,u){var t=(r[u]||0)+(k[u]||0);if(t&&t>=0){p[u]=t||null}});q.css(p)})};if(typeof(j.alsoResize)=="object"&&!j.alsoResize.nodeType){c.each(j.alsoResize,function(m,n){e(m,n)})}else{e(j.alsoResize)}},stop:function(e,f){c(this).removeData("resizable-alsoresize")}});c.ui.plugin.add("resizable","animate",{stop:function(i,n){var p=c(this).data("resizable"),j=p.options;var h=p._proportionallyResizeElements,e=h.length&&(/textarea/i).test(h[0].nodeName),f=e&&c.ui.hasScroll(h[0],"left")?0:p.sizeDiff.height,l=e?0:p.sizeDiff.width;var g={width:(p.size.width-l),height:(p.size.height-f)},k=(parseInt(p.element.css("left"),10)+(p.position.left-p.originalPosition.left))||null,m=(parseInt(p.element.css("top"),10)+(p.position.top-p.originalPosition.top))||null;p.element.animate(c.extend(g,m&&k?{top:m,left:k}:{}),{duration:j.animateDuration,easing:j.animateEasing,step:function(){var o={width:parseInt(p.element.css("width"),10),height:parseInt(p.element.css("height"),10),top:parseInt(p.element.css("top"),10),left:parseInt(p.element.css("left"),10)};if(h&&h.length){c(h[0]).css({width:o.width,height:o.height})}p._updateCache(o);p._propagate("resize",i)}})}});c.ui.plugin.add("resizable","containment",{start:function(f,r){var t=c(this).data("resizable"),j=t.options,l=t.element;var g=j.containment,k=(g instanceof c)?g.get(0):(/parent/.test(g))?l.parent().get(0):g;if(!k){return}t.containerElement=c(k);if(/document/.test(g)||g==document){t.containerOffset={left:0,top:0};t.containerPosition={left:0,top:0};t.parentData={element:c(document),left:0,top:0,width:c(document).width(),height:c(document).height()||document.body.parentNode.scrollHeight}}else{var n=c(k),i=[];c(["Top","Right","Left","Bottom"]).each(function(p,o){i[p]=b(n.css("padding"+o))});t.containerOffset=n.offset();t.containerPosition=n.position();t.containerSize={height:(n.innerHeight()-i[3]),width:(n.innerWidth()-i[1])};var q=t.containerOffset,e=t.containerSize.height,m=t.containerSize.width,h=(c.ui.hasScroll(k,"left")?k.scrollWidth:m),s=(c.ui.hasScroll(k)?k.scrollHeight:e);t.parentData={element:k,left:q.left,top:q.top,width:h,height:s}}},resize:function(g,q){var t=c(this).data("resizable"),i=t.options,f=t.containerSize,p=t.containerOffset,m=t.size,n=t.position,r=t._aspectRatio||g.shiftKey,e={top:0,left:0},h=t.containerElement;if(h[0]!=document&&(/static/).test(h.css("position"))){e=p}if(n.left<(t._helper?p.left:0)){t.size.width=t.size.width+(t._helper?(t.position.left-p.left):(t.position.left-e.left));if(r){t.size.height=t.size.width/i.aspectRatio}t.position.left=i.helper?p.left:0}if(n.top<(t._helper?p.top:0)){t.size.height=t.size.height+(t._helper?(t.position.top-p.top):t.position.top);if(r){t.size.width=t.size.height*i.aspectRatio}t.position.top=t._helper?p.top:0}t.offset.left=t.parentData.left+t.position.left;t.offset.top=t.parentData.top+t.position.top;var l=Math.abs((t._helper?t.offset.left-e.left:(t.offset.left-e.left))+t.sizeDiff.width),s=Math.abs((t._helper?t.offset.top-e.top:(t.offset.top-p.top))+t.sizeDiff.height);var k=t.containerElement.get(0)==t.element.parent().get(0),j=/relative|absolute/.test(t.containerElement.css("position"));if(k&&j){l-=t.parentData.left}if(l+t.size.width>=t.parentData.width){t.size.width=t.parentData.width-l;if(r){t.size.height=t.size.width/t.aspectRatio}}if(s+t.size.height>=t.parentData.height){t.size.height=t.parentData.height-s;if(r){t.size.width=t.size.height*t.aspectRatio}}},stop:function(f,n){var q=c(this).data("resizable"),g=q.options,l=q.position,m=q.containerOffset,e=q.containerPosition,i=q.containerElement;var j=c(q.helper),r=j.offset(),p=j.outerWidth()-q.sizeDiff.width,k=j.outerHeight()-q.sizeDiff.height;if(q._helper&&!g.animate&&(/relative/).test(i.css("position"))){c(this).css({left:r.left-e.left-m.left,width:p,height:k})}if(q._helper&&!g.animate&&(/static/).test(i.css("position"))){c(this).css({left:r.left-e.left-m.left,width:p,height:k})}}});c.ui.plugin.add("resizable","ghost",{start:function(g,h){var e=c(this).data("resizable"),i=e.options,f=e.size;e.ghost=e.originalElement.clone();e.ghost.css({opacity:0.25,display:"block",position:"relative",height:f.height,width:f.width,margin:0,left:0,top:0}).addClass("ui-resizable-ghost").addClass(typeof i.ghost=="string"?i.ghost:"");e.ghost.appendTo(e.helper)},resize:function(f,g){var e=c(this).data("resizable"),h=e.options;if(e.ghost){e.ghost.css({position:"relative",height:e.size.height,width:e.size.width})}},stop:function(f,g){var e=c(this).data("resizable"),h=e.options;if(e.ghost&&e.helper){e.helper.get(0).removeChild(e.ghost.get(0))}}});c.ui.plugin.add("resizable","grid",{resize:function(e,m){var p=c(this).data("resizable"),h=p.options,k=p.size,i=p.originalSize,j=p.originalPosition,n=p.axis,l=h._aspectRatio||e.shiftKey;h.grid=typeof h.grid=="number"?[h.grid,h.grid]:h.grid;var g=Math.round((k.width-i.width)/(h.grid[0]||1))*(h.grid[0]||1),f=Math.round((k.height-i.height)/(h.grid[1]||1))*(h.grid[1]||1);if(/^(se|s|e)$/.test(n)){p.size.width=i.width+g;p.size.height=i.height+f}else{if(/^(ne)$/.test(n)){p.size.width=i.width+g;p.size.height=i.height+f;p.position.top=j.top-f}else{if(/^(sw)$/.test(n)){p.size.width=i.width+g;p.size.height=i.height+f;p.position.left=j.left-g}else{p.size.width=i.width+g;p.size.height=i.height+f;p.position.top=j.top-f;p.position.left=j.left-g}}}}});var b=function(e){return parseInt(e,10)||0};var a=function(e){return !isNaN(parseInt(e,10))}})(jQuery);/*! + * jQuery hashchange event - v1.3 - 7/21/2010 + * http://benalman.com/projects/jquery-hashchange-plugin/ + * + * Copyright (c) 2010 "Cowboy" Ben Alman + * Dual licensed under the MIT and GPL licenses. + * http://benalman.com/about/license/ + */ +(function($,e,b){var c="hashchange",h=document,f,g=$.event.special,i=h.documentMode,d="on"+c in e&&(i===b||i>7);function a(j){j=j||location.href;return"#"+j.replace(/^[^#]*#?(.*)$/,"$1")}$.fn[c]=function(j){return j?this.bind(c,j):this.trigger(c)};$.fn[c].delay=50;g[c]=$.extend(g[c],{setup:function(){if(d){return false}$(f.start)},teardown:function(){if(d){return false}$(f.stop)}});f=(function(){var j={},p,m=a(),k=function(q){return q},l=k,o=k;j.start=function(){p||n()};j.stop=function(){p&&clearTimeout(p);p=b};function n(){var r=a(),q=o(m);if(r!==m){l(m=r,q);$(e).trigger(c)}else{if(q!==m){location.href=location.href.replace(/#.*/,"")+q}}p=setTimeout(n,$.fn[c].delay)}$.browser.msie&&!d&&(function(){var q,r;j.start=function(){if(!q){r=$.fn[c].src;r=r&&r+a();q=$(' + + + + +
+ +
+
linear_scaling.h File Reference
+
+
+ +

Implements the BLAS linear scaling function alpha*AB + beta*C. +More...

+ +

Go to the source code of this file.

+
+ + + + + + + +

+Classes

struct  cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >
 Functor to compute linear combination of fragments. More...
 
struct  cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params
 The parameters. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+ + + + + diff --git a/docs/generated-html/linear__scaling_8h_source.html b/docs/generated-html/linear__scaling_8h_source.html new file mode 100644 index 00000000..d9817ed0 --- /dev/null +++ b/docs/generated-html/linear__scaling_8h_source.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: linear_scaling.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
linear_scaling.h
+
+
+Go to the documentation of this file.
1 
2 /***************************************************************************************************
3  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without modification, are permitted
6  * provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright notice, this list of
8  * conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright notice, this list of
10  * conditions and the following disclaimer in the documentation and/or other materials
11  * provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
13  * to endorse or promote products derived from this software without specific prior written
14  * permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
18  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  **************************************************************************************************/
29 #pragma once
30 
32 
33 namespace cutlass {
34 namespace gemm {
35 
37 
39 template <typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_> >
40 struct LinearScaling {
41  // The scalar.
42  typedef Scalar_ Scalar;
43  // The adapater.
44  typedef FragmentMultiplyAdd_ FragmentMultiplyAdd;
45 
47  struct Params {
50 
52  template <typename GemmDesc_>
53  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) {
54  alpha = desc.alpha;
55  beta = desc.beta;
56  return 0;
57  }
58  };
59 
61  CUTLASS_DEVICE LinearScaling(Params const& params) : alpha(params.alpha), beta(params.beta) {}
62 
64  template <typename Fragment_>
65  CUTLASS_DEVICE void evaluate(Fragment_ const& accum, Fragment_& output) {
67  mad.multiply(alpha, accum, output);
68  }
69 
71  template <typename Fragment_>
72  CUTLASS_DEVICE void evaluate(Fragment_ const& accum, Fragment_ const& old, Fragment_& output) {
74  Fragment_ tmp;
75  mad.multiply(beta, old, tmp);
76  mad.multiply_add(alpha, accum, tmp, output);
77  }
78 
81 };
82 
84 
85 } // namespace gemm
86 } // namespace cutlass
Definition: convert.h:33
+
Scalar alpha
The alpha/beta scaling params.
Definition: linear_scaling.h:49
+
Scalar alpha
The alpha/beta scaling factors.
Definition: linear_scaling.h:80
+
CUTLASS_DEVICE LinearScaling(Params const &params)
Ctor.
Definition: linear_scaling.h:61
+
CUTLASS_DEVICE void evaluate(Fragment_ const &accum, Fragment_ const &old, Fragment_ &output)
Evaluate the functor.
Definition: linear_scaling.h:72
+
Scalar beta
Definition: linear_scaling.h:49
+
CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc)
Initialize the parameters.
Definition: linear_scaling.h:53
+
Scalar beta
Definition: linear_scaling.h:80
+
Defines multiply-add operations on fragments within a thread.
+
FragmentMultiplyAdd_ FragmentMultiplyAdd
Definition: linear_scaling.h:44
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_DEVICE void evaluate(Fragment_ const &accum, Fragment_ &output)
Evaluate the functor.
Definition: linear_scaling.h:65
+
The parameters.
Definition: linear_scaling.h:47
+
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40
+
Scalar_ Scalar
Definition: linear_scaling.h:42
+
+ + + + diff --git a/docs/generated-html/load__store_8h.html b/docs/generated-html/load__store_8h.html new file mode 100644 index 00000000..b23ec3cb --- /dev/null +++ b/docs/generated-html/load__store_8h.html @@ -0,0 +1,128 @@ + + + + + + + +Cutlass: load_store.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
load_store.h File Reference
+
+ + + + + diff --git a/docs/generated-html/load__store_8h_source.html b/docs/generated-html/load__store_8h_source.html new file mode 100644 index 00000000..e421cbf2 --- /dev/null +++ b/docs/generated-html/load__store_8h_source.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: load_store.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
load_store.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/vector.h>
31 
32 namespace cutlass {
33 
35 
39 struct MemorySpace {
40  enum Kind {
41  kGeneric, // Data accessed through pointer dereferencing
42  kShared, // Data resides in shared memory
43  kGlobal // Data resides in global memory
44  };
45 };
46 
48 
49 template <typename Scalar_,
50  int Lanes_,
51  MemorySpace::Kind Memory_,
52  bool = (Lanes_ > 1),
53  size_t = (sizeof(Scalar_) * Lanes_)>
54 struct Load {
57 
59  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
60  dst = reinterpret_cast<AccessType const*>(&pointer[offset])[0];
61  }
62 };
63 
65 
66 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
67 struct Load<Scalar_, Lanes_, Memory_, true, 4> {
70 
72  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
73  dst.registers[0] = reinterpret_cast<uint32_t const*>(&pointer[offset])[0];
74  }
75 };
76 
78 
79 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
80 struct Load<Scalar_, Lanes_, Memory_, true, 8> {
83 
85  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
86  uint2 tmp = reinterpret_cast<uint2 const*>(&pointer[offset])[0];
87  dst.registers[0] = tmp.x;
88  dst.registers[1] = tmp.y;
89  }
90 };
91 
93 
94 template <MemorySpace::Kind Memory_>
95 struct Load<double, 2, Memory_, true, 16> {
98 
100  static CUTLASS_DEVICE void load(AccessType& dst, double const* pointer, int offset) {
101  double2 tmp = reinterpret_cast<double2 const*>(&pointer[offset])[0];
102  dst[0] = tmp.x;
103  dst[1] = tmp.y;
104  }
105 };
106 
108 
109 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
110 struct Load<Scalar_, Lanes_, Memory_, true, 16> {
113 
115  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
116  uint4 tmp = reinterpret_cast<uint4 const*>(&pointer[offset])[0];
117  dst.registers[0] = tmp.x;
118  dst.registers[1] = tmp.y;
119  dst.registers[2] = tmp.z;
120  dst.registers[3] = tmp.w;
121  }
122 };
123 
125 
126 template <typename Scalar_,
127  int Lanes_,
128  MemorySpace::Kind Memory_,
129  bool = (Lanes_ > 1),
130  size_t = (sizeof(Scalar_) * Lanes_)>
131 struct Store {
134 
136  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
137  pointer[offset] = src;
138  }
139 };
140 
142 
143 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
144 struct Store<Scalar_, Lanes_, Memory_, true, 4> {
147 
149  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
150  uint32_t* addr = reinterpret_cast<uint32_t*>(&pointer[offset]);
151  addr[0] = src.registers[0];
152  }
153 };
154 
156 
157 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
158 struct Store<Scalar_, Lanes_, Memory_, true, 8> {
161 
163  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
164  uint2* addr = reinterpret_cast<uint2*>(&pointer[offset]);
165  addr[0] = make_uint2(src.registers[0], src.registers[1]);
166  }
167 };
168 
170 
171 template <MemorySpace::Kind Memory_>
172 struct Store<double, 2, Memory_, true, 16> {
175 
177  static CUTLASS_DEVICE void store(AccessType const& src, double* pointer, int offset) {
178  double2* addr = reinterpret_cast<double2*>(&pointer[offset]);
179  addr[0] = make_double2(src[0], src[1]);
180  }
181 };
182 
184 
185 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
186 struct Store<Scalar_, Lanes_, Memory_, true, 16> {
189 
191  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
192  uint4* addr = reinterpret_cast<uint4*>(&pointer[offset]);
193  addr[0] = make_uint4(src.registers[0], src.registers[1], src.registers[2], src.registers[3]);
194  }
195 };
196 
198 
199 } // namespace cutlass
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:188
+
Definition: load_store.h:42
+
Definition: convert.h:33
+
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:163
+
Enum to specify which memory space data resides in.
Definition: load_store.h:39
+
Definition: load_store.h:43
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:59
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:112
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:146
+
Kind
Definition: load_store.h:40
+
Definition: load_store.h:131
+
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:136
+
uint32_t registers[kRegisters]
The data in registers.
Definition: vector.h:80
+
Vectorize< double, 2 >::Type AccessType
The output type.
Definition: load_store.h:174
+
Definition: load_store.h:41
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:72
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:133
+
Definition: vector.h:61
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:85
+
Definition: load_store.h:54
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:82
+
Defines a 1D vector of elements held in the registers of each thread.
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:160
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:115
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:69
+
static CUTLASS_DEVICE void load(AccessType &dst, double const *pointer, int offset)
The store function.
Definition: load_store.h:100
+
Vectorize< double, 2 >::Type AccessType
The output type.
Definition: load_store.h:97
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:56
+
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:191
+
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:149
+
static CUTLASS_DEVICE void store(AccessType const &src, double *pointer, int offset)
The store function.
Definition: load_store.h:177
+
+ + + + diff --git a/docs/generated-html/matrix__traits_8h.html b/docs/generated-html/matrix__traits_8h.html new file mode 100644 index 00000000..f83c89f0 --- /dev/null +++ b/docs/generated-html/matrix__traits_8h.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: matrix_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
matrix_traits.h File Reference
+
+
+ +

Defines properties of matrices used to denote layout and operands to GEMM kernels. +More...

+ +

Go to the source code of this file.

+ + + + + + + + +

+Classes

struct  cutlass::MatrixLayout
 Describes layouts of matrices. More...
 
struct  cutlass::GemmOperand
 Gemm operand - D = A * B + C. More...
 
+ + + +

+Namespaces

 cutlass
 
+
+ + + + diff --git a/docs/generated-html/matrix__traits_8h_source.html b/docs/generated-html/matrix__traits_8h_source.html new file mode 100644 index 00000000..9f8de2dc --- /dev/null +++ b/docs/generated-html/matrix__traits_8h_source.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: matrix_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
matrix_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 namespace cutlass {
31 
33 
35 struct MatrixLayout {
37 };
38 
40 
42 struct GemmOperand {
43  enum Kind { kA, kB, kC, kD };
44 };
45 
47 
48 } // namespace cutlass
Definition: convert.h:33
+
Definition: matrix_traits.h:43
+
Describes layouts of matrices.
Definition: matrix_traits.h:35
+
Definition: matrix_traits.h:36
+
Definition: matrix_traits.h:43
+
Gemm operand - D = A * B + C.
Definition: matrix_traits.h:42
+
Definition: matrix_traits.h:36
+
Kind
Definition: matrix_traits.h:36
+
Kind
Definition: matrix_traits.h:43
+
Definition: matrix_traits.h:43
+
Definition: matrix_traits.h:43
+
+ + + + diff --git a/docs/generated-html/menu.js b/docs/generated-html/menu.js new file mode 100644 index 00000000..89aaf575 --- /dev/null +++ b/docs/generated-html/menu.js @@ -0,0 +1,50 @@ +/* + @licstart The following is the entire license notice for the + JavaScript code in this file. + + Copyright (C) 1997-2017 by Dimitri van Heesch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + @licend The above is the entire license notice + for the JavaScript code in this file + */ +function initMenu(relPath,searchEnabled,serverSide,searchPage,search) { + function makeTree(data,relPath) { + var result=''; + if ('children' in data) { + result+=''; + } + return result; + } + + $('#main-nav').append(makeTree(menudata,relPath)); + $('#main-nav').children(':first').addClass('sm sm-dox').attr('id','main-menu'); + if (searchEnabled) { + if (serverSide) { + $('#main-menu').append('
  • '); + } else { + $('#main-menu').append('
  • '); + } + } + $('#main-menu').smartmenus(); +} +/* @license-end */ diff --git a/docs/generated-html/menudata.js b/docs/generated-html/menudata.js new file mode 100644 index 00000000..725988aa --- /dev/null +++ b/docs/generated-html/menudata.js @@ -0,0 +1,151 @@ +/* +@ @licstart The following is the entire license notice for the +JavaScript code in this file. + +Copyright (C) 1997-2017 by Dimitri van Heesch + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +@licend The above is the entire license notice +for the JavaScript code in this file +*/ +var menudata={children:[ +{text:"Main Page",url:"index.html"}, +{text:"Modules",url:"modules.html"}, +{text:"Namespaces",url:"namespaces.html",children:[ +{text:"Namespace List",url:"namespaces.html"}, +{text:"Namespace Members",url:"namespacemembers.html",children:[ +{text:"All",url:"namespacemembers.html",children:[ +{text:"_",url:"namespacemembers.html#index__"}, +{text:"c",url:"namespacemembers.html#index_c"}, +{text:"f",url:"namespacemembers.html#index_f"}, +{text:"g",url:"namespacemembers.html#index_g"}, +{text:"i",url:"namespacemembers.html#index_i"}, +{text:"l",url:"namespacemembers.html#index_l"}, +{text:"m",url:"namespacemembers.html#index_m"}, +{text:"o",url:"namespacemembers.html#index_o"}, +{text:"r",url:"namespacemembers.html#index_r"}, +{text:"s",url:"namespacemembers.html#index_s"}, +{text:"t",url:"namespacemembers.html#index_t"}]}, +{text:"Functions",url:"namespacemembers_func.html",children:[ +{text:"_",url:"namespacemembers_func.html#index__"}, +{text:"c",url:"namespacemembers_func.html#index_c"}, +{text:"g",url:"namespacemembers_func.html#index_g"}, +{text:"i",url:"namespacemembers_func.html#index_i"}, +{text:"l",url:"namespacemembers_func.html#index_l"}, +{text:"m",url:"namespacemembers_func.html#index_m"}, +{text:"o",url:"namespacemembers_func.html#index_o"}, +{text:"r",url:"namespacemembers_func.html#index_r"}, +{text:"s",url:"namespacemembers_func.html#index_s"}]}, +{text:"Typedefs",url:"namespacemembers_type.html"}]}]}, +{text:"Classes",url:"annotated.html",children:[ +{text:"Class List",url:"annotated.html"}, +{text:"Class Index",url:"classes.html"}, +{text:"Class Hierarchy",url:"hierarchy.html"}, +{text:"Class Members",url:"functions.html",children:[ +{text:"All",url:"functions.html",children:[ +{text:"a",url:"functions.html#index_a"}, +{text:"b",url:"functions_b.html#index_b"}, +{text:"c",url:"functions_c.html#index_c"}, +{text:"d",url:"functions_d.html#index_d"}, +{text:"e",url:"functions_e.html#index_e"}, +{text:"f",url:"functions_f.html#index_f"}, +{text:"g",url:"functions_g.html#index_g"}, +{text:"h",url:"functions_h.html#index_h"}, +{text:"i",url:"functions_i.html#index_i"}, +{text:"k",url:"functions_k.html#index_k"}, +{text:"l",url:"functions_l.html#index_l"}, +{text:"m",url:"functions_m.html#index_m"}, +{text:"n",url:"functions_n.html#index_n"}, +{text:"o",url:"functions_o.html#index_o"}, +{text:"p",url:"functions_p.html#index_p"}, +{text:"r",url:"functions_r.html#index_r"}, +{text:"s",url:"functions_s.html#index_s"}, +{text:"t",url:"functions_t.html#index_t"}, +{text:"u",url:"functions_u.html#index_u"}, +{text:"v",url:"functions_v.html#index_v"}, +{text:"w",url:"functions_w.html#index_w"}, +{text:"y",url:"functions_y.html#index_y"}, +{text:"~",url:"functions_0x7e.html#index_0x7e"}]}, +{text:"Functions",url:"functions_func.html",children:[ +{text:"a",url:"functions_func.html#index_a"}, +{text:"b",url:"functions_func_b.html#index_b"}, +{text:"c",url:"functions_func_c.html#index_c"}, +{text:"d",url:"functions_func_d.html#index_d"}, +{text:"e",url:"functions_func_e.html#index_e"}, +{text:"f",url:"functions_func_f.html#index_f"}, +{text:"g",url:"functions_func_g.html#index_g"}, +{text:"h",url:"functions_func_h.html#index_h"}, +{text:"i",url:"functions_func_i.html#index_i"}, +{text:"l",url:"functions_func_l.html#index_l"}, +{text:"m",url:"functions_func_m.html#index_m"}, +{text:"o",url:"functions_func_o.html#index_o"}, +{text:"p",url:"functions_func_p.html#index_p"}, +{text:"r",url:"functions_func_r.html#index_r"}, +{text:"s",url:"functions_func_s.html#index_s"}, +{text:"t",url:"functions_func_t.html#index_t"}, +{text:"u",url:"functions_func_u.html#index_u"}, +{text:"v",url:"functions_func_v.html#index_v"}, +{text:"w",url:"functions_func_w.html#index_w"}, +{text:"~",url:"functions_func_0x7e.html#index_0x7e"}]}, +{text:"Variables",url:"functions_vars.html",children:[ +{text:"a",url:"functions_vars.html#index_a"}, +{text:"b",url:"functions_vars_b.html#index_b"}, +{text:"c",url:"functions_vars_c.html#index_c"}, +{text:"d",url:"functions_vars_d.html#index_d"}, +{text:"e",url:"functions_vars_e.html#index_e"}, +{text:"f",url:"functions_vars_f.html#index_f"}, +{text:"g",url:"functions_vars_g.html#index_g"}, +{text:"i",url:"functions_vars_i.html#index_i"}, +{text:"k",url:"functions_vars_k.html#index_k"}, +{text:"l",url:"functions_vars_l.html#index_l"}, +{text:"m",url:"functions_vars_m.html#index_m"}, +{text:"n",url:"functions_vars_n.html#index_n"}, +{text:"p",url:"functions_vars_p.html#index_p"}, +{text:"r",url:"functions_vars_r.html#index_r"}, +{text:"s",url:"functions_vars_s.html#index_s"}, +{text:"t",url:"functions_vars_t.html#index_t"}, +{text:"v",url:"functions_vars_v.html#index_v"}]}, +{text:"Typedefs",url:"functions_type.html",children:[ +{text:"a",url:"functions_type.html#index_a"}, +{text:"b",url:"functions_type_b.html#index_b"}, +{text:"c",url:"functions_type_c.html#index_c"}, +{text:"d",url:"functions_type_d.html#index_d"}, +{text:"e",url:"functions_type_e.html#index_e"}, +{text:"f",url:"functions_type_f.html#index_f"}, +{text:"g",url:"functions_type_g.html#index_g"}, +{text:"i",url:"functions_type_i.html#index_i"}, +{text:"l",url:"functions_type_l.html#index_l"}, +{text:"m",url:"functions_type_m.html#index_m"}, +{text:"n",url:"functions_type_n.html#index_n"}, +{text:"o",url:"functions_type_o.html#index_o"}, +{text:"p",url:"functions_type_p.html#index_p"}, +{text:"s",url:"functions_type_s.html#index_s"}, +{text:"t",url:"functions_type_t.html#index_t"}, +{text:"v",url:"functions_type_v.html#index_v"}, +{text:"w",url:"functions_type_w.html#index_w"}, +{text:"y",url:"functions_type_y.html#index_y"}]}, +{text:"Enumerations",url:"functions_enum.html"}, +{text:"Enumerator",url:"functions_eval.html",children:[ +{text:"a",url:"functions_eval.html#index_a"}, +{text:"k",url:"functions_eval.html#index_k"}, +{text:"m",url:"functions_eval.html#index_m"}, +{text:"v",url:"functions_eval.html#index_v"}]}]}]}, +{text:"Files",url:"files.html",children:[ +{text:"File List",url:"files.html"}, +{text:"File Members",url:"globals.html",children:[ +{text:"All",url:"globals.html"}, +{text:"Functions",url:"globals_func.html"}, +{text:"Macros",url:"globals_defs.html"}]}]}]} diff --git a/docs/generated-html/modules.html b/docs/generated-html/modules.html new file mode 100644 index 00000000..c42247bd --- /dev/null +++ b/docs/generated-html/modules.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Modules + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    +
    Modules
    +
    + + + + + diff --git a/docs/generated-html/namespacecutlass.html b/docs/generated-html/namespacecutlass.html new file mode 100644 index 00000000..989135cb --- /dev/null +++ b/docs/generated-html/namespacecutlass.html @@ -0,0 +1,1557 @@ + + + + + + + +Cutlass: cutlass Namespace Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + +
    +
    + +
    +
    cutlass Namespace Reference
    +
    +
    + + + + + + +

    +Namespaces

     gemm
     
     platform
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  AlignedStruct
     
    struct  ComputeOffsetFromShape
     Compute the offset for the given coordinates in a cube. More...
     
    struct  ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >
     Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
     
    struct  ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >
     Compute the offset for the given coordinates in a cube with a depth of 1. More...
     
    struct  ComputeOffsetFromStrides
     Compute the offset for the given coordinates in a cube. More...
     
    struct  ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >
     Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
     
    struct  ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >
     Compute the offset for the given coordinates in a cube with a depth of 1. More...
     
    struct  ComputeThreadOffsetFromStrides
     Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_. More...
     
    struct  ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >
     Specialization for D=1 and C=1. More...
     
    struct  ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >
     Specialization for D=1. More...
     
    struct  ConstPredicateTileAdapter
     Adapter to enable random access to predicates via logical coordinate within a tile. More...
     
    struct  Convert
     
    struct  Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
     
    struct  Coord
     Statically-sized array specifying Coords within a tensor. More...
     
    struct  Copy
     
    struct  divide_assert
     
    struct  Extent
     Returns the extent of a scalar or vector. More...
     
    struct  Extent< Vector< T, Lanes > >
     Returns the number of lanes of a vector if need be. More...
     
    struct  Extent< Vector< T, Lanes > const >
     Returns the number of lanes of a vector if need be. More...
     
    struct  Fragment
     A template defining Fragment Concept. More...
     
    struct  FragmentConstIterator
     
    struct  FragmentIterator
     A template defining Fragment Iterator Concept. More...
     
    struct  FragmentLoad
     
    struct  FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  FragmentStore
     
    struct  FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  GemmOperand
     Gemm operand - D = A * B + C. More...
     
    struct  Identity
     Describes identity elements. More...
     
    struct  is_pow2
     
    struct  IteratorAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    struct  IteratorFragment
     Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. More...
     
    struct  Load
     
    struct  Load< double, 2, Memory_, true, 16 >
     
    struct  Load< Scalar_, Lanes_, Memory_, true, 16 >
     
    struct  Load< Scalar_, Lanes_, Memory_, true, 4 >
     
    struct  Load< Scalar_, Lanes_, Memory_, true, 8 >
     
    struct  log2_down
     
    struct  log2_down< N, 1, Count >
     
    struct  log2_up
     
    struct  log2_up< N, 1, Count >
     
    struct  MatrixLayout
     Describes layouts of matrices. More...
     
    struct  MemorySpace
     Enum to specify which memory space data resides in. More...
     
    struct  PredicateTileAdapter
     Adapter to enable random access to predicates via logical coordinate within a tile. More...
     
    struct  PredicateVector
     Statically sized array of bits implementing. More...
     
    struct  ReshapeTile
     
    struct  ReshapeTile< Tile_, kAccessSize_, true >
     
    struct  Shape
     A Shape implementing Layout Concept describing the dimensions of a cube. More...
     
    struct  ShapeAdd
     
    struct  ShapeCount
     Compute derived counted of a Layout Concept based class. More...
     
    struct  ShapeDiv
     
    struct  ShapeMax
     
    struct  ShapeMin
     
    struct  ShapeMul
     
    struct  ShapeScale
     
    struct  ShapeStrides
     
    struct  ShapeSub
     
    struct  sqrt_est
     
    struct  StorageType
     
    struct  StorageType< 1 >
     
    struct  StorageType< 2 >
     
    struct  StorageType< 4 >
     
    struct  Store
     
    struct  Store< double, 2, Memory_, true, 16 >
     
    struct  Store< Scalar_, Lanes_, Memory_, true, 16 >
     
    struct  Store< Scalar_, Lanes_, Memory_, true, 4 >
     
    struct  Store< Scalar_, Lanes_, Memory_, true, 8 >
     
    class  TensorRef
     Structure modeling a pointer and stride into a tensor. More...
     
    class  TensorView
     Host-side reference implementation of tensor operations. More...
     
    struct  TiledThreadOffset
     Basic thread offset function computed from a thread shape. More...
     
    struct  TileIteratorBase
     Iterator for accessing a stripmined tile in memory. More...
     
    struct  TileLoadIterator
     An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
     
    struct  TileStoreIterator
     An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
     
    struct  TileTraits
     A template defining Tile Traits Concept. More...
     
    struct  TileTraitsContiguousMajor
     
    struct  TileTraitsStandard
     Chooses 'best' shape to enable warp raking along contiguous dimension if possible. More...
     
    struct  TileTraitsStrideMajor
     
    struct  TileTraitsWarpRake
     Tiling in which warps rake across the contiguous dimension. More...
     
    struct  TrivialPredicateTileAdapter
     Always returns true predicate. More...
     
    union  Vector
     
    union  Vector< half, kLanes_ >
     
    struct  Vectorize
     
    struct  Vectorize< Element_, 1 >
     
    struct  VectorTraits
     Traits describing properties of vectors and scalar-as-vectors. More...
     
    struct  VectorTraits< Vector< T, Lanes > >
     Partial specialization for actual cutlass::Vector. More...
     
    struct  VectorTraits< Vector< T, Lanes > const >
     Partial specialization for actual cutlass::Vector. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord (int _0)
     Helper to make a 2-element coordinate. More...
     
    CUTLASS_HOST_DEVICE Coord< 2 > make_Coord (int _0, int _1)
     Helper to make a 2-element coordinate. More...
     
    CUTLASS_HOST_DEVICE Coord< 3 > make_Coord (int _0, int _1, int _2)
     Helper to make a 3-element coordinate. More...
     
    CUTLASS_HOST_DEVICE Coord< 4 > make_Coord (int _0, int _1, int _2, int _3)
     Helper to make a 4-element coordinate. More...
     
    CUTLASS_HOST_DEVICE Coord< 2 > get_Coord_hw (Coord< 3 > const &coord)
     Getter. More...
     
    CUTLASS_HOST_DEVICE Coord< 2 > get_Coord_hw (Coord< 4 > const &coord)
     Getter. More...
     
    CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_hwc (Coord< 4 > const &coord)
     Getter. More...
     
    CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_dhw (Coord< 4 > const &coord)
     Getter. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator &iterator, Fragment &fragment)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_DEVICE void shared_iterator_load (InputIterator &iterator, Fragment &fragment)
     Loads a fragment from a shared memory input iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_DEVICE void shared_iterator_load (InputIterator &iterator, Fragment &fragment, int d)
     Loads a fragment from a shared memory input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Loads a fragment from an input iterator, masked by a predicate iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
     Loads a fragment from an input iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator &iterator, Fragment &fragment)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_DEVICE void shared_iterator_store (OutputIterator &iterator, Fragment const &fragment)
     Stores a fragment to a shared memory output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Stores a fragment to an output iterator, masked by a predicate iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Stores a fragment to an output iterator, masked by a predicate iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
     Stores a fragment to an output iterator. More...
     
    template<typename dividend_t , typename divisor_t >
    CUTLASS_HOST_DEVICE dividend_t round_nearest (dividend_t dividend, divisor_t divisor)
     
    template<typename value_t >
    CUTLASS_HOST_DEVICE value_t gcd (value_t a, value_t b)
     
    template<typename value_t >
    CUTLASS_HOST_DEVICE value_t lcm (value_t a, value_t b)
     
    __host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl (cudaError_t error, const char *filename, int line)
     The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
     
    template<>
    struct __align__ (1) AlignedStruct< 1 >
     
    template<>
    struct __align__ (2) AlignedStruct< 2 >
     
    template<>
    struct __align__ (4) AlignedStruct< 4 >
     
    template<>
    struct __align__ (8) AlignedStruct< 8 >
     
    template<>
    struct __align__ (16) AlignedStruct< 16 >
     
    template<>
    struct __align__ (32) AlignedStruct< 32 >
     
    template<>
    struct __align__ (64) AlignedStruct< 64 >
     
    template<typename Scalar_ >
    CUTLASS_DEVICE void make_zero (Scalar_ &x)
     
    template<typename Scalar_ , int kLanes_>
    CUTLASS_DEVICE void make_zero (Vector< Scalar_, kLanes_ > &vec)
     
    +

    Function Documentation

    + +

    ◆ __align__() [1/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [2/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [3/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [4/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ (16 )
    +
    + +
    +
    + +

    ◆ __align__() [5/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ (32 )
    +
    + +
    +
    + +

    ◆ __align__() [6/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ (64 )
    +
    + +
    +
    + +

    ◆ __align__() [7/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ ()
    +
    + +
    +
    + +

    ◆ cuda_perror_impl()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    __host__ CUTLASS_DEVICE cudaError_t cutlass::cuda_perror_impl (cudaError_t error,
    const char * filename,
    int line 
    )
    +
    +
    Returns
    The CUDA error.
    + +
    +
    + +

    ◆ gcd()

    + +
    +
    +
    +template<typename value_t >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE value_t cutlass::gcd (value_t a,
    value_t b 
    )
    +
    +

    Greatest common divisor

    + +
    +
    + +

    ◆ get_Coord_dhw()

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<3> cutlass::get_Coord_dhw (Coord< 4 > const & coord)
    +
    + +
    +
    + +

    ◆ get_Coord_hw() [1/2]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<2> cutlass::get_Coord_hw (Coord< 3 > const & coord)
    +
    + +
    +
    + +

    ◆ get_Coord_hw() [2/2]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<2> cutlass::get_Coord_hw (Coord< 4 > const & coord)
    +
    + +
    +
    + +

    ◆ get_Coord_hwc()

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<3> cutlass::get_Coord_hwc (Coord< 4 > const & coord)
    +
    + +
    +
    + +

    ◆ iterator_load() [1/4]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator & iterator,
    Fragmentfragment 
    )
    +
    + +
    +
    + +

    ◆ iterator_load() [2/4]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const & _iterator,
    Fragmentfragment,
    typename InputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    +
    + +
    +
    + +

    ◆ iterator_load() [3/4]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const & iterator,
    Fragmentfragment,
    typename InputIterator::Index offset = 0 
    )
    +
    + +
    +
    + +

    ◆ iterator_load() [4/4]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const & iterator,
    Fragmentfragment,
    ConstPredicateAdapter pred_it 
    )
    +
    + +
    +
    + +

    ◆ iterator_load_post_increment() [1/3]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator & iterator,
    Fragmentfragment,
    typename InputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    +
    + +
    +
    + +

    ◆ iterator_load_post_increment() [2/3]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator & iterator,
    Fragmentfragment,
    typename InputIterator::Index offset = 0 
    )
    +
    + +
    +
    + +

    ◆ iterator_load_post_increment() [3/3]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator & iterator,
    Fragmentfragment,
    ConstPredicateAdapter pred_it 
    )
    +
    + +
    +
    + +

    ◆ iterator_store() [1/4]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator & iterator,
    Fragmentfragment 
    )
    +
    + +
    +
    + +

    ◆ iterator_store() [2/4]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const & _iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    +
    + +
    +
    + +

    ◆ iterator_store() [3/4]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const & iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset = 0 
    )
    +
    + +
    +
    + +

    ◆ iterator_store() [4/4]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const & iterator,
    Fragment const & fragment,
    ConstPredicateAdapter pred_it 
    )
    +
    + +
    +
    + +

    ◆ iterator_store_post_increment() [1/3]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator & iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    +
    + +
    +
    + +

    ◆ iterator_store_post_increment() [2/3]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator & iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset = 0 
    )
    +
    + +
    +
    + +

    ◆ iterator_store_post_increment() [3/3]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator & iterator,
    Fragment const & fragment,
    ConstPredicateAdapter pred_it 
    )
    +
    + +
    +
    + +

    ◆ lcm()

    + +
    +
    +
    +template<typename value_t >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE value_t cutlass::lcm (value_t a,
    value_t b 
    )
    +
    +

    Least common multiple

    + +
    +
    + +

    ◆ make_Coord() [1/4]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<1> cutlass::make_Coord (int _0)
    +
    + +
    +
    + +

    ◆ make_Coord() [2/4]

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<2> cutlass::make_Coord (int _0,
    int _1 
    )
    +
    + +
    +
    + +

    ◆ make_Coord() [3/4]

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<3> cutlass::make_Coord (int _0,
    int _1,
    int _2 
    )
    +
    + +
    +
    + +

    ◆ make_Coord() [4/4]

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::make_Coord (int _0,
    int _1,
    int _2,
    int _3 
    )
    +
    + +
    +
    + +

    ◆ make_zero() [1/2]

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::make_zero (Scalar_ & x)
    +
    + +
    +
    + +

    ◆ make_zero() [2/2]

    + +
    +
    +
    +template<typename Scalar_ , int kLanes_>
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::make_zero (Vector< Scalar_, kLanes_ > & vec)
    +
    + +
    +
    + +

    ◆ round_nearest()

    + +
    +
    +
    +template<typename dividend_t , typename divisor_t >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE dividend_t cutlass::round_nearest (dividend_t dividend,
    divisor_t divisor 
    )
    +
    +

    Round dividend up to the nearest multiple of divisor

    + +
    +
    + +

    ◆ shared_iterator_load() [1/2]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator & iterator,
    Fragmentfragment 
    )
    +
    + +
    +
    + +

    ◆ shared_iterator_load() [2/2]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator & iterator,
    Fragmentfragment,
    int d 
    )
    +
    + +
    +
    + +

    ◆ shared_iterator_store()

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::shared_iterator_store (OutputIterator & iterator,
    Fragment const & fragment 
    )
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/namespacecutlass_1_1gemm.html b/docs/generated-html/namespacecutlass_1_1gemm.html new file mode 100644 index 00000000..1c84e448 --- /dev/null +++ b/docs/generated-html/namespacecutlass_1_1gemm.html @@ -0,0 +1,371 @@ + + + + + + + +Cutlass: cutlass::gemm Namespace Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm Namespace Reference
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  ClearAccumulators
     
    struct  DgemmConfig
     
    struct  DgemmTraits
     
    struct  FragmentMultiplyAdd
     
    struct  FragmentMultiplyAdd< half >
     
    struct  Gemm
     
    struct  GemmConfig
     
    struct  GemmDesc
     
    struct  GemmEpilogue
     
    struct  GemmEpilogueTraits
     
    struct  GemmEpilogueTraitsHelper
     
    struct  GemmGlobalIteratorAb
     
    struct  GemmGlobalIteratorCd
     
    struct  GemmGlobalTileCdTraits
     
    struct  GemmGlobalTileTraits
     
    struct  GemmMultiplicandTraits
     
    struct  GemmOperandTraitsAb
     Helper to describe attributes of GEMM matrix operands. More...
     
    struct  GemmSharedLoadTileATraits
     
    struct  GemmSharedLoadTileBTraits
     
    struct  GemmSharedLoadTileDTraits
     
    struct  GemmSharedStoreTileAbTraits
     
    struct  GemmSharedStoreTileDTraits
     
    struct  GemmSharedStoreWithSkewTileAbTraits
     
    struct  GemmTileTraitsHelperA
     
    struct  GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
     
    struct  GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
     
    struct  GemmTileTraitsHelperB
     
    struct  GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
     
    struct  GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
     
    struct  GemmTraits
     
    struct  GetExtent
     
    struct  GetExtent< GemmOperand::kA, Tile_ >
     
    struct  GetExtent< GemmOperand::kB, Tile_ >
     
    struct  GlobalLoadStream
     
    struct  GlobalLoadStreamBase
     
    struct  HgemmConfig
     
    struct  HgemmCrosswiseGlobalTileTraits
     
    struct  HgemmSwizzle
     
    struct  HgemmTileTraitsHelperA
     
    struct  HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
     
    struct  HgemmTileTraitsHelperB
     
    struct  HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
     
    struct  HgemmTraits
     
    struct  HgemmTraitsHelper
     
    struct  HgemmTransformerA
     
    struct  HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
     
    struct  HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
     
    struct  HgemmTransformerB
     
    struct  HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
     
    struct  HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
     
    struct  IdentityBlockSwizzle
     
    struct  IgemmConfig
     
    struct  IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >
     
    struct  IgemmContiguousGlobalTileTraits
     
    struct  IgemmEpilogue
     
    struct  IgemmEpilogue< GemmEpilogueTraits_, true >
     
    struct  IgemmEpilogueScalar
     
    struct  IgemmEpilogueScalar< int >
     
    struct  IgemmEpilogueTraits
     
    struct  IgemmEpilogueTraitsHelper
     
    struct  IgemmFloatToInt8Converter
     
    struct  IgemmGlobalLoadTransformer
     
    struct  IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >
     
    struct  IgemmGlobalStoreTransformer
     
    struct  IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >
     
    struct  IgemmInt8ToFloatConverter
     
    struct  IgemmSharedStoreTransformer
     
    struct  IgemmSwizzle
     
    struct  IgemmTileTraitsHelperA
     
    struct  IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
     
    struct  IgemmTileTraitsHelperB
     
    struct  IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
     
    struct  IgemmTraits
     
    struct  IgemmTraitsHelper
     
    struct  IgemmTransformerA
     
    struct  IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
     
    struct  IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
     
    struct  IgemmTransformerB
     
    struct  IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
     
    struct  IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
     
    struct  LinearScaling
     Functor to compute linear combination of fragments. More...
     
    struct  ProjectOperand
     
    struct  ProjectOperand< GemmOperand::kA, Kstrided >
     Project A operand - (0, K, M) More...
     
    struct  ProjectOperand< GemmOperand::kB, Kstrided >
     Project B operand - (0, K, N) More...
     
    struct  ProjectOperand< GemmOperand::kC, true >
     Project C operand - (0, N, M) More...
     
    struct  ProjectOperand< GemmOperand::kD, true >
     Project D operand - (0, N, M) More...
     
    struct  ReshapeThreads
     
    struct  ReshapeThreads< Tile_, Threads_, true >
     
    struct  SgemmConfig
     
    struct  SgemmTraits
     
    struct  SharedLoadStream
     
    struct  SimplifiedGemmEpilogueTraits
     
    struct  SimplifiedGemmTraits
     
    struct  SimplifiedGemmTraitsHelper
     
    struct  ThreadMultiplyAdd
     Template performing matrix multiply-add operation within a thread. More...
     
    struct  ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
     Template performing matrix multiply-add operation within a thread. More...
     
    struct  ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
     Template performing matrix multiply-add operation within a thread. More...
     
    struct  WmmaGemmGlobalIteratorCd
     
    struct  WmmaGemmGlobalIteratorCdTraits
     
    + + + + + + + + + +

    +Functions

    template<typename Gemm_ >
    __global__ void gemm_kernel (typename Gemm_::Params params)
     
    template<typename T >
    CUTLASS_DEVICE bool is_zero (T x)
     
    CUTLASS_DEVICE bool is_zero (half x)
     
    +

    Function Documentation

    + +

    ◆ gemm_kernel()

    + +
    +
    +
    +template<typename Gemm_ >
    + + + + + + + + +
    __global__ void cutlass::gemm::gemm_kernel (typename Gemm_::Params params)
    +
    + +
    +
    + +

    ◆ is_zero() [1/2]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::gemm::is_zero (x)
    +
    + +
    +
    + +

    ◆ is_zero() [2/2]

    + +
    +
    + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::gemm::is_zero (half x)
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/namespacecutlass_1_1platform.html b/docs/generated-html/namespacecutlass_1_1platform.html new file mode 100644 index 00000000..2bf30c0d --- /dev/null +++ b/docs/generated-html/namespacecutlass_1_1platform.html @@ -0,0 +1,938 @@ + + + + + + + +Cutlass: cutlass::platform Namespace Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform Namespace Reference
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  aligned_chunk
     
    struct  aligned_storage
     std::aligned_storage More...
     
    struct  alignment_of
     std::alignment_of More...
     
    struct  alignment_of< const value_t >
     
    struct  alignment_of< const volatile value_t >
     
    struct  alignment_of< double2 >
     
    struct  alignment_of< double4 >
     
    struct  alignment_of< float4 >
     
    struct  alignment_of< int4 >
     
    struct  alignment_of< long4 >
     
    struct  alignment_of< longlong2 >
     
    struct  alignment_of< longlong4 >
     
    struct  alignment_of< uint4 >
     
    struct  alignment_of< ulong4 >
     
    struct  alignment_of< ulonglong2 >
     
    struct  alignment_of< ulonglong4 >
     
    struct  alignment_of< volatile value_t >
     
    struct  bool_constant
     std::bool_constant More...
     
    struct  conditional
     std::conditional (true specialization) More...
     
    struct  conditional< false, T, F >
     std::conditional (false specialization) More...
     
    struct  default_delete
     Default deleter. More...
     
    struct  default_delete< T[]>
     Partial specialization for deleting array types. More...
     
    struct  enable_if
     std::enable_if (true specialization) More...
     
    struct  enable_if< false, T >
     std::enable_if (false specialization) More...
     
    struct  greater
     std::greater More...
     
    struct  integral_constant
     std::integral_constant More...
     
    struct  is_arithmetic
     std::is_arithmetic More...
     
    struct  is_base_of
     std::is_base_of More...
     
    struct  is_base_of_helper
     Helper for std::is_base_of. More...
     
    struct  is_floating_point
     std::is_floating_point More...
     
    struct  is_fundamental
     std::is_fundamental More...
     
    struct  is_integral
     std::is_integral More...
     
    struct  is_integral< char >
     
    struct  is_integral< const T >
     
    struct  is_integral< const volatile T >
     
    struct  is_integral< int >
     
    struct  is_integral< long >
     
    struct  is_integral< long long >
     
    struct  is_integral< short >
     
    struct  is_integral< signed char >
     
    struct  is_integral< unsigned char >
     
    struct  is_integral< unsigned int >
     
    struct  is_integral< unsigned long >
     
    struct  is_integral< unsigned long long >
     
    struct  is_integral< unsigned short >
     
    struct  is_integral< volatile T >
     
    struct  is_pointer
     std::is_pointer More...
     
    struct  is_pointer_helper
     Helper for std::is_pointer (false specialization) More...
     
    struct  is_pointer_helper< T * >
     Helper for std::is_pointer (true specialization) More...
     
    struct  is_same
     std::is_same (false specialization) More...
     
    struct  is_same< A, A >
     std::is_same (true specialization) More...
     
    struct  is_trivially_copyable
     
    struct  is_void
     std::is_void More...
     
    struct  is_volatile
     std::is_volatile More...
     
    struct  is_volatile< volatile T >
     
    struct  less
     std::less More...
     
    struct  nullptr_t
     std::nullptr_t More...
     
    struct  plus
     platform::plus More...
     
    struct  remove_const
     std::remove_const (non-const specialization) More...
     
    struct  remove_const< const T >
     std::remove_const (const specialization) More...
     
    struct  remove_cv
     std::remove_cv More...
     
    struct  remove_volatile
     std::remove_volatile (non-volatile specialization) More...
     
    struct  remove_volatile< volatile T >
     std::remove_volatile (volatile specialization) More...
     
    class  unique_ptr
     std::unique_ptr More...
     
    + + + + + + + +

    +Typedefs

    typedef integral_constant< bool, true > true_type
     The type used as a compile-time boolean with true value. More...
     
    typedef integral_constant< bool, false > false_type
     The type used as a compile-time boolean with false value. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & min (const T &a, const T &b)
     std::min More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & max (const T &a, const T &b)
     std::max More...
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator== (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator!= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator< (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator<= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator> (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator>= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > make_pair (T1 t, T2 u)
     
    template<>
    struct __align__ (1) aligned_chunk< 1 >
     
    template<>
    struct __align__ (2) aligned_chunk< 2 >
     
    template<>
    struct __align__ (4) aligned_chunk< 4 >
     
    template<>
    struct __align__ (8) aligned_chunk< 8 >
     
    template<>
    struct __align__ (16) aligned_chunk< 16 >
     
    template<>
    struct __align__ (32) aligned_chunk< 32 >
     
    template<>
    struct __align__ (64) aligned_chunk< 64 >
     
    template<>
    struct __align__ (128) aligned_chunk< 128 >
     
    template<>
    struct __align__ (256) aligned_chunk< 256 >
     
    template<>
    struct __align__ (512) aligned_chunk< 512 >
     
    template<>
    struct __align__ (1024) aligned_chunk< 1024 >
     
    template<>
    struct __align__ (2048) aligned_chunk< 2048 >
     
    template<>
    struct __align__ (4096) aligned_chunk< 4096 >
     
    template<typename T , typename Deleter >
    void swap (unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
     Specializes the swap algorithm. More...
     
    +

    Typedef Documentation

    + +

    ◆ false_type

    + +
    +
    + + + + +
    typedef integral_constant<bool, false> cutlass::platform::false_type
    +
    + +
    +
    + +

    ◆ true_type

    + +
    +
    + + + + +
    typedef integral_constant<bool, true> cutlass::platform::true_type
    +
    + +
    +
    +

    Function Documentation

    + +

    ◆ __align__() [1/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [2/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [3/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (64 )
    +
    + +
    +
    + +

    ◆ __align__() [4/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (128 )
    +
    + +
    +
    + +

    ◆ __align__() [5/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (256 )
    +
    + +
    +
    + +

    ◆ __align__() [6/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (512 )
    +
    + +
    +
    + +

    ◆ __align__() [7/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (1024 )
    +
    + +
    +
    + +

    ◆ __align__() [8/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (2048 )
    +
    + +
    +
    + +

    ◆ __align__() [9/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (4096 )
    +
    + +
    +
    + +

    ◆ __align__() [10/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (32 )
    +
    + +
    +
    + +

    ◆ __align__() [11/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [12/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [13/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (16 )
    +
    + +
    +
    + +

    ◆ make_pair()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE std::pair<T1, T2> cutlass::platform::make_pair (T1 t,
    T2 u 
    )
    +
    + +
    +
    + +

    ◆ max()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr const T& cutlass::platform::max (const T & a,
    const T & b 
    )
    +
    + +
    +
    + +

    ◆ min()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr const T& cutlass::platform::min (const T & a,
    const T & b 
    )
    +
    + +
    +
    + +

    ◆ operator!=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator!= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator<()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator< (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator<=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator<= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator==()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator== (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator>()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator> (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator>=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator>= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ swap()

    + +
    +
    +
    +template<typename T , typename Deleter >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    void cutlass::platform::swap (unique_ptr< T, Deleter > & lhs,
    unique_ptr< T, Deleter > & rhs 
    )
    +
    +noexcept
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/namespacemembers.html b/docs/generated-html/namespacemembers.html new file mode 100644 index 00000000..9566721d --- /dev/null +++ b/docs/generated-html/namespacemembers.html @@ -0,0 +1,214 @@ + + + + + + + +Cutlass: Namespace Members + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    Here is a list of all namespace members with links to the namespace documentation for each member:
    + +

    - _ -

    + + +

    - c -

      +
    • cuda_perror_impl() +: cutlass +
    • +
    + + +

    - f -

    + + +

    - g -

    + + +

    - i -

    + + +

    - l -

    + + +

    - m -

    + + +

    - o -

    + + +

    - r -

    + + +

    - s -

    + + +

    - t -

    +
    + + + + diff --git a/docs/generated-html/namespacemembers_func.html b/docs/generated-html/namespacemembers_func.html new file mode 100644 index 00000000..2ba52b0e --- /dev/null +++ b/docs/generated-html/namespacemembers_func.html @@ -0,0 +1,200 @@ + + + + + + + +Cutlass: Namespace Members + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +  + +

    - _ -

    + + +

    - c -

      +
    • cuda_perror_impl() +: cutlass +
    • +
    + + +

    - g -

    + + +

    - i -

    + + +

    - l -

    + + +

    - m -

    + + +

    - o -

    + + +

    - r -

    + + +

    - s -

    +
    + + + + diff --git a/docs/generated-html/namespacemembers_type.html b/docs/generated-html/namespacemembers_type.html new file mode 100644 index 00000000..37618fba --- /dev/null +++ b/docs/generated-html/namespacemembers_type.html @@ -0,0 +1,87 @@ + + + + + + + +Cutlass: Namespace Members + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    + + + + diff --git a/docs/generated-html/namespacenv__std.html b/docs/generated-html/namespacenv__std.html new file mode 100644 index 00000000..5eb2f549 --- /dev/null +++ b/docs/generated-html/namespacenv__std.html @@ -0,0 +1,934 @@ + + + + + + + +Cutlass: nv_std Namespace Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + +
    +
    + +
    +
    nv_std Namespace Reference
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  aligned_chunk
     
    struct  aligned_storage
     std::aligned_storage More...
     
    struct  alignment_of
     std::alignment_of More...
     
    struct  alignment_of< const value_t >
     
    struct  alignment_of< const volatile value_t >
     
    struct  alignment_of< double2 >
     
    struct  alignment_of< double4 >
     
    struct  alignment_of< float4 >
     
    struct  alignment_of< int4 >
     
    struct  alignment_of< long4 >
     
    struct  alignment_of< longlong2 >
     
    struct  alignment_of< longlong4 >
     
    struct  alignment_of< uint4 >
     
    struct  alignment_of< ulong4 >
     
    struct  alignment_of< ulonglong2 >
     
    struct  alignment_of< ulonglong4 >
     
    struct  alignment_of< volatile value_t >
     
    struct  bool_constant
     std::bool_constant More...
     
    struct  conditional
     std::conditional (true specialization) More...
     
    struct  conditional< false, T, F >
     std::conditional (false specialization) More...
     
    struct  default_delete
     Default deleter. More...
     
    struct  default_delete< T[]>
     Partial specialization for deleting array types. More...
     
    struct  enable_if
     std::enable_if (true specialization) More...
     
    struct  enable_if< false, T >
     std::enable_if (false specialization) More...
     
    struct  greater
     std::greater More...
     
    struct  integral_constant
     std::integral_constant More...
     
    struct  is_arithmetic
     std::is_arithmetic More...
     
    struct  is_base_of
     std::is_base_of More...
     
    struct  is_base_of_helper
     Helper for std::is_base_of. More...
     
    struct  is_floating_point
     std::is_floating_point More...
     
    struct  is_fundamental
     std::is_fundamental More...
     
    struct  is_integral
     std::is_integral More...
     
    struct  is_integral< char >
     
    struct  is_integral< const T >
     
    struct  is_integral< const volatile T >
     
    struct  is_integral< int >
     
    struct  is_integral< long >
     
    struct  is_integral< long long >
     
    struct  is_integral< short >
     
    struct  is_integral< signed char >
     
    struct  is_integral< unsigned char >
     
    struct  is_integral< unsigned int >
     
    struct  is_integral< unsigned long >
     
    struct  is_integral< unsigned long long >
     
    struct  is_integral< unsigned short >
     
    struct  is_integral< volatile T >
     
    struct  is_pointer
     std::is_pointer More...
     
    struct  is_pointer_helper
     Helper for std::is_pointer (false specialization) More...
     
    struct  is_pointer_helper< T * >
     Helper for std::is_pointer (true specialization) More...
     
    struct  is_same
     std::is_same (false specialization) More...
     
    struct  is_same< A, A >
     std::is_same (true specialization) More...
     
    struct  is_trivially_copyable
     
    struct  is_void
     std::is_void More...
     
    struct  is_volatile
     std::is_volatile More...
     
    struct  is_volatile< volatile T >
     
    struct  less
     std::less More...
     
    struct  nullptr_t
     std::nullptr_t More...
     
    struct  plus
     nv_std::plus More...
     
    struct  remove_const
     std::remove_const (non-const specialization) More...
     
    struct  remove_const< const T >
     std::remove_const (const specialization) More...
     
    struct  remove_cv
     std::remove_cv More...
     
    struct  remove_volatile
     std::remove_volatile (non-volatile specialization) More...
     
    struct  remove_volatile< volatile T >
     std::remove_volatile (volatile specialization) More...
     
    class  unique_ptr
     std::unique_ptr More...
     
    + + + + + + + +

    +Typedefs

    typedef integral_constant< bool, true > true_type
     The type used as a compile-time boolean with true value. More...
     
    typedef integral_constant< bool, false > false_type
     The type used as a compile-time boolean with false value. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & min (const T &a, const T &b)
     std::min More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & max (const T &a, const T &b)
     std::max More...
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator== (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator!= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator< (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator<= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator> (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator>= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > make_pair (T1 t, T2 u)
     
    template<>
    struct __align__ (1) aligned_chunk< 1 >
     
    template<>
    struct __align__ (2) aligned_chunk< 2 >
     
    template<>
    struct __align__ (4) aligned_chunk< 4 >
     
    template<>
    struct __align__ (8) aligned_chunk< 8 >
     
    template<>
    struct __align__ (16) aligned_chunk< 16 >
     
    template<>
    struct __align__ (32) aligned_chunk< 32 >
     
    template<>
    struct __align__ (64) aligned_chunk< 64 >
     
    template<>
    struct __align__ (128) aligned_chunk< 128 >
     
    template<>
    struct __align__ (256) aligned_chunk< 256 >
     
    template<>
    struct __align__ (512) aligned_chunk< 512 >
     
    template<>
    struct __align__ (1024) aligned_chunk< 1024 >
     
    template<>
    struct __align__ (2048) aligned_chunk< 2048 >
     
    template<>
    struct __align__ (4096) aligned_chunk< 4096 >
     
    template<typename T , typename Deleter >
    void swap (unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
     Specializes the swap algorithm. More...
     
    +

    Typedef Documentation

    + +

    ◆ false_type

    + +
    +
    + + + + +
    typedef integral_constant<bool, false> nv_std::false_type
    +
    + +
    +
    + +

    ◆ true_type

    + +
    +
    + + + + +
    typedef integral_constant<bool, true> nv_std::true_type
    +
    + +
    +
    +

    Function Documentation

    + +

    ◆ __align__() [1/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [2/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [3/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (64 )
    +
    + +
    +
    + +

    ◆ __align__() [4/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (128 )
    +
    + +
    +
    + +

    ◆ __align__() [5/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (256 )
    +
    + +
    +
    + +

    ◆ __align__() [6/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (512 )
    +
    + +
    +
    + +

    ◆ __align__() [7/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (1024 )
    +
    + +
    +
    + +

    ◆ __align__() [8/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (2048 )
    +
    + +
    +
    + +

    ◆ __align__() [9/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (4096 )
    +
    + +
    +
    + +

    ◆ __align__() [10/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (32 )
    +
    + +
    +
    + +

    ◆ __align__() [11/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [12/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [13/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (16 )
    +
    + +
    +
    + +

    ◆ make_pair()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE std::pair<T1, T2> nv_std::make_pair (T1 t,
    T2 u 
    )
    +
    + +
    +
    + +

    ◆ max()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr const T& nv_std::max (const T & a,
    const T & b 
    )
    +
    + +
    +
    + +

    ◆ min()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr const T& nv_std::min (const T & a,
    const T & b 
    )
    +
    + +
    +
    + +

    ◆ operator!=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator!= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator<()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator< (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator<=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator<= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator==()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator== (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator>()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator> (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator>=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator>= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ swap()

    + +
    +
    +
    +template<typename T , typename Deleter >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    void nv_std::swap (unique_ptr< T, Deleter > & lhs,
    unique_ptr< T, Deleter > & rhs 
    )
    +
    +noexcept
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/namespaces.html b/docs/generated-html/namespaces.html new file mode 100644 index 00000000..b12cce12 --- /dev/null +++ b/docs/generated-html/namespaces.html @@ -0,0 +1,90 @@ + + + + + + + +Cutlass: Namespace List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    +
    Namespace List
    +
    +
    +
    Here is a list of all namespaces with brief descriptions:
    +
    [detail level 12]
    + + + +
     Ncutlass
     Ngemm
     Nplatform
    +
    +
    + + + + diff --git a/docs/generated-html/nav_f.png b/docs/generated-html/nav_f.png new file mode 100644 index 00000000..2a434079 Binary files /dev/null and b/docs/generated-html/nav_f.png differ diff --git a/docs/generated-html/nav_g.png b/docs/generated-html/nav_g.png new file mode 100644 index 00000000..2093a237 Binary files /dev/null and b/docs/generated-html/nav_g.png differ diff --git a/docs/generated-html/nav_h.png b/docs/generated-html/nav_h.png new file mode 100644 index 00000000..9f47cbc6 Binary files /dev/null and b/docs/generated-html/nav_h.png differ diff --git a/docs/generated-html/nv__std_8h.html b/docs/generated-html/nv__std_8h.html new file mode 100644 index 00000000..08a96ccb --- /dev/null +++ b/docs/generated-html/nv__std_8h.html @@ -0,0 +1,630 @@ + + + + + + + +Cutlass: nv_std.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std.h File Reference
    +
    +
    + +

    C++ features that may be otherwise unimplemented for CUDA device functions. +More...

    +
    #include <stdint.h>
    +#include <algorithm>
    +#include <cstddef>
    +#include <functional>
    +#include <utility>
    +#include <cutlass/cutlass.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  nv_std::plus< T >
     nv_std::plus More...
     
    struct  nv_std::less< T >
     std::less More...
     
    struct  nv_std::greater< T >
     std::greater More...
     
    struct  nv_std::integral_constant< value_t, V >
     std::integral_constant More...
     
    struct  nv_std::integral_constant< value_t, V >
     std::integral_constant More...
     
    struct  nv_std::bool_constant< V >
     std::bool_constant More...
     
    struct  nv_std::nullptr_t
     std::nullptr_t More...
     
    struct  nv_std::enable_if< C, T >
     std::enable_if (true specialization) More...
     
    struct  nv_std::enable_if< false, T >
     std::enable_if (false specialization) More...
     
    struct  nv_std::conditional< B, T, F >
     std::conditional (true specialization) More...
     
    struct  nv_std::conditional< false, T, F >
     std::conditional (false specialization) More...
     
    struct  nv_std::remove_const< T >
     std::remove_const (non-const specialization) More...
     
    struct  nv_std::remove_const< const T >
     std::remove_const (const specialization) More...
     
    struct  nv_std::remove_volatile< T >
     std::remove_volatile (non-volatile specialization) More...
     
    struct  nv_std::remove_volatile< volatile T >
     std::remove_volatile (volatile specialization) More...
     
    struct  nv_std::remove_cv< T >
     std::remove_cv More...
     
    struct  nv_std::is_same< A, B >
     std::is_same (false specialization) More...
     
    struct  nv_std::is_same< A, A >
     std::is_same (true specialization) More...
     
    struct  nv_std::is_base_of_helper< BaseT, DerivedT >
     Helper for std::is_base_of. More...
     
    struct  nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >
     
    struct  nv_std::is_base_of< BaseT, DerivedT >
     std::is_base_of More...
     
    struct  nv_std::is_volatile< T >
     std::is_volatile More...
     
    struct  nv_std::is_volatile< volatile T >
     
    struct  nv_std::is_pointer_helper< T >
     Helper for std::is_pointer (false specialization) More...
     
    struct  nv_std::is_pointer_helper< T * >
     Helper for std::is_pointer (true specialization) More...
     
    struct  nv_std::is_pointer< T >
     std::is_pointer More...
     
    struct  nv_std::is_void< T >
     std::is_void More...
     
    struct  nv_std::is_integral< T >
     std::is_integral More...
     
    struct  nv_std::is_integral< char >
     
    struct  nv_std::is_integral< signed char >
     
    struct  nv_std::is_integral< unsigned char >
     
    struct  nv_std::is_integral< short >
     
    struct  nv_std::is_integral< unsigned short >
     
    struct  nv_std::is_integral< int >
     
    struct  nv_std::is_integral< unsigned int >
     
    struct  nv_std::is_integral< long >
     
    struct  nv_std::is_integral< unsigned long >
     
    struct  nv_std::is_integral< long long >
     
    struct  nv_std::is_integral< unsigned long long >
     
    struct  nv_std::is_integral< volatile T >
     
    struct  nv_std::is_integral< const T >
     
    struct  nv_std::is_integral< const volatile T >
     
    struct  nv_std::is_floating_point< T >
     std::is_floating_point More...
     
    struct  nv_std::is_arithmetic< T >
     std::is_arithmetic More...
     
    struct  nv_std::is_fundamental< T >
     std::is_fundamental More...
     
    struct  nv_std::is_trivially_copyable< T >
     
    struct  nv_std::alignment_of< value_t >
     std::alignment_of More...
     
    struct  nv_std::alignment_of< value_t >::pad
     
    struct  nv_std::alignment_of< int4 >
     
    struct  nv_std::alignment_of< uint4 >
     
    struct  nv_std::alignment_of< float4 >
     
    struct  nv_std::alignment_of< long4 >
     
    struct  nv_std::alignment_of< ulong4 >
     
    struct  nv_std::alignment_of< longlong2 >
     
    struct  nv_std::alignment_of< ulonglong2 >
     
    struct  nv_std::alignment_of< double2 >
     
    struct  nv_std::alignment_of< longlong4 >
     
    struct  nv_std::alignment_of< ulonglong4 >
     
    struct  nv_std::alignment_of< double4 >
     
    struct  nv_std::alignment_of< volatile value_t >
     
    struct  nv_std::alignment_of< const value_t >
     
    struct  nv_std::alignment_of< const volatile value_t >
     
    struct  nv_std::aligned_chunk< Align >
     
    struct  nv_std::aligned_storage< Len, Align >
     std::aligned_storage More...
     
    struct  nv_std::default_delete< T >
     Default deleter. More...
     
    struct  nv_std::default_delete< T[]>
     Partial specialization for deleting array types. More...
     
    class  nv_std::unique_ptr< T, Deleter >
     std::unique_ptr More...
     
    + + + +

    +Namespaces

     nv_std
     
    + + + + + + + + + + + + + + + + + + + + + + +

    +Macros

    #define noexcept
     noexcept, constexpr More...
     
    #define constexpr
     
    #define nullptr   0
     nullptr More...
     
    #define __nv_std_cat_(a, b)   a##b
     static_assert More...
     
    #define __nv_std_cat(a, b)   __nv_std_cat_(a, b)
     
    #define static_assert(__e, __m)   typedef int __nv_std_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
     
    #define __NV_STD_MAX(a, b)   (((b) > (a)) ? (b) : (a))
     Select maximum(a, b) More...
     
    #define __NV_STD_MIN(a, b)   (((b) < (a)) ? (b) : (a))
     Select minimum(a, b) More...
     
    + + + + + + + +

    +Typedefs

    typedef integral_constant< bool, true > nv_std::true_type
     The type used as a compile-time boolean with true value. More...
     
    typedef integral_constant< bool, false > nv_std::false_type
     The type used as a compile-time boolean with false value. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & nv_std::min (const T &a, const T &b)
     std::min More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & nv_std::max (const T &a, const T &b)
     std::max More...
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator== (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator!= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator< (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator<= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator> (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator>= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > nv_std::make_pair (T1 t, T2 u)
     
    template<>
    struct nv_std::__align__ (1) aligned_chunk< 1 >
     
    template<>
    struct nv_std::__align__ (2) aligned_chunk< 2 >
     
    template<>
    struct nv_std::__align__ (4) aligned_chunk< 4 >
     
    template<>
    struct nv_std::__align__ (8) aligned_chunk< 8 >
     
    template<>
    struct nv_std::__align__ (16) aligned_chunk< 16 >
     
    template<>
    struct nv_std::__align__ (32) aligned_chunk< 32 >
     
    template<>
    struct nv_std::__align__ (64) aligned_chunk< 64 >
     
    template<>
    struct nv_std::__align__ (128) aligned_chunk< 128 >
     
    template<>
    struct nv_std::__align__ (256) aligned_chunk< 256 >
     
    template<>
    struct nv_std::__align__ (512) aligned_chunk< 512 >
     
    template<>
    struct nv_std::__align__ (1024) aligned_chunk< 1024 >
     
    template<>
    struct nv_std::__align__ (2048) aligned_chunk< 2048 >
     
    template<>
    struct nv_std::__align__ (4096) aligned_chunk< 4096 >
     
    template<typename T , typename Deleter >
    void nv_std::swap (unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
     Specializes the swap algorithm. More...
     
    +

    Detailed Description

    +

    This file has three components:

    +

    (1) Macros:

      +
    • Empty macro defines for C++ keywords not supported by the current version of C++. These simply allow compilation to proceed (but do not provide the added semantics).
        +
      • noexcept
      • +
      • constexpr
      • +
      • nullptr
      • +
      • static_assert
      • +
      +
    • +
    • Macro functions that we need in constant expressions because the C++ equivalents require constexpr compiler support. These are prefixed with __NV_STD_*
        +
      • __NV_STD_MAX
      • +
      • __NV_STD_MIN
      • +
      +
    • +
    +

    (2) Re-implementations of STL functions and types:

      +
    • C++ features that need the device annotation. These are placed into the nv_std namespace.
        +
      • plus
      • +
      • less
      • +
      • greater
      • +
      • min
      • +
      • max
      • +
      • methods on std::pair (==, !=, <, <=, >, >=, and make_pair())
      • +
      +
    • +
    +

    (3) Stop-gap implementations of unsupported STL functions and types:

      +
    • STL functions and types defined by C++ 11/14/17/etc. that are not provided by the current version of C++. These are placed into the nv_std namespace
        +
      • integral_constant
      • +
      • nullptr_t
      • +
      • true_type
      • +
      • false_type
      • +
      • bool_constant
      • +
      • enable_if
      • +
      • conditional
      • +
      • is_same
      • +
      • is_base_of
      • +
      • remove_const
      • +
      • remove_volatile
      • +
      • remove_cv
      • +
      • is_volatile
      • +
      • is_pointer
      • +
      • is_void
      • +
      • is_integral
      • +
      • is_floating_point
      • +
      • is_arithmetic
      • +
      • is_fundamental
      • +
      • is_trivially_copyable
      • +
      • alignment_of
      • +
      • aligned_storage
      • +
      +
    • +
    +

    (4) Functions and types that are STL-like (but aren't in the STL):

      +
    • TODO: min and max functors?
    • +
    +

    The idea is that, as we drop support for older compilers, we can simply #define the __NV_STD_XYZ macros and nv_std namespace to alias their C++ counterparts (or trivially find-and-replace their occurrences in code text).

    +

    Macro Definition Documentation

    + +

    ◆ __nv_std_cat

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __nv_std_cat( a,
     
    )   __nv_std_cat_(a, b)
    +
    + +
    +
    + +

    ◆ __nv_std_cat_

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __nv_std_cat_( a,
     
    )   a##b
    +
    + +
    +
    + +

    ◆ __NV_STD_MAX

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __NV_STD_MAX( a,
     
    )   (((b) > (a)) ? (b) : (a))
    +
    + +
    +
    + +

    ◆ __NV_STD_MIN

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __NV_STD_MIN( a,
     
    )   (((b) < (a)) ? (b) : (a))
    +
    + +
    +
    + +

    ◆ constexpr

    + +
    +
    + + + + +
    #define constexpr
    +
    + +
    +
    + +

    ◆ noexcept

    + +
    +
    + + + + +
    #define noexcept
    +
    + +
    +
    + +

    ◆ nullptr

    + +
    +
    + + + + +
    #define nullptr   0
    +
    + +
    +
    + +

    ◆ static_assert

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define static_assert( __e,
     __m 
    )   typedef int __nv_std_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/nv__std_8h_source.html b/docs/generated-html/nv__std_8h_source.html new file mode 100644 index 00000000..a28a327c --- /dev/null +++ b/docs/generated-html/nv__std_8h_source.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: nv_std.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    25 
    26 #pragma once
    27 
    94 //-----------------------------------------------------------------------------
    95 // Dependencies
    96 //-----------------------------------------------------------------------------
    97 
    98 #include <stdint.h>
    99 
    100 #if !defined(__CUDACC_RTC__)
    101 //-----------------------------------------------------------------------------
    102 // Include STL files that nv_std provides functionality for
    103 //-----------------------------------------------------------------------------
    104 
    105 #include <algorithm> // Minimum/maximum operations
    106 #include <cstddef> // nullptr_t
    107 #include <functional> // Arithmetic operations
    108 #include <utility> // For methods on std::pair
    109 #if (!defined(_MSC_VER) && (__cplusplus >= 201103L)) || (defined(_MSC_VER) && (_MS_VER >= 1500))
    110 #include <type_traits> // For integral constants, conditional metaprogramming, and type traits
    111 #endif
    112 
    113 #include <cutlass/cutlass.h>
    114 
    115 #endif
    116 /******************************************************************************
    117  * Macros
    118  ******************************************************************************/
    119 //-----------------------------------------------------------------------------
    120 // Keywords
    121 //-----------------------------------------------------------------------------
    122 
    124 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1900))
    125 #ifndef noexcept
    126 #define noexcept
    127 #endif
    128 #ifndef constexpr
    129 #define constexpr
    130 #endif
    131 #endif
    132 
    134 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1310))
    135 #ifndef nullptr
    136 #define nullptr 0
    137 #endif
    138 #endif
    139 
    141 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600))
    142 #ifndef static_assert
    143 #define __nv_std_cat_(a, b) a##b
    144 #define __nv_std_cat(a, b) __nv_std_cat_(a, b)
    145 #define static_assert(__e, __m) typedef int __nv_std_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
    146 #endif
    147 #endif
    148 
    149 //-----------------------------------------------------------------------------
    150 // Functions
    151 //-----------------------------------------------------------------------------
    152 
    154 #ifndef __NV_STD_MAX
    155 #define __NV_STD_MAX(a, b) (((b) > (a)) ? (b) : (a))
    156 #endif
    157 
    159 #ifndef __NV_STD_MIN
    160 #define __NV_STD_MIN(a, b) (((b) < (a)) ? (b) : (a))
    161 #endif
    162 
    163 /******************************************************************************
    164  * Re-implementations
    165  ******************************************************************************/
    166 
    167 namespace nv_std {
    168 
    169 //-----------------------------------------------------------------------------
    170 // Arithmetic operations, comparisons <functional>
    171 //-----------------------------------------------------------------------------
    172 
    174 template <typename T>
    175 struct plus {
    176  CUTLASS_HOST_DEVICE constexpr T operator()(const T& lhs, const T& rhs) const { return lhs + rhs; }
    177 };
    178 
    180 template <typename T>
    181 struct less {
    182  CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const {
    183  return lhs < rhs;
    184  }
    185 };
    186 
    188 template <typename T>
    189 struct greater {
    190  CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const {
    191  return lhs > rhs;
    192  }
    193 };
    194 
    195 //-----------------------------------------------------------------------------
    196 // Minimum/maximum operations <algorithm>
    197 //-----------------------------------------------------------------------------
    198 
    200 template <typename T>
    201 CUTLASS_HOST_DEVICE constexpr const T& min(const T& a, const T& b) {
    202  return (b < a) ? b : a;
    203 }
    204 
    206 template <typename T>
    207 CUTLASS_HOST_DEVICE constexpr const T& max(const T& a, const T& b) {
    208  return (a < b) ? b : a;
    209 }
    210 
    211 #if !defined(__CUDACC_RTC__)
    212 //-----------------------------------------------------------------------------
    213 // Methods on std::pair
    214 //-----------------------------------------------------------------------------
    215 
    216 using std::pair;
    217 
    218 template <class T1, class T2>
    219 CUTLASS_HOST_DEVICE constexpr bool operator==(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    220  return (lhs.first == rhs.first) && (lhs.second == rhs.second);
    221 }
    222 
    223 template <class T1, class T2>
    224 CUTLASS_HOST_DEVICE constexpr bool operator!=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    225  return (lhs.first != rhs.first) && (lhs.second != rhs.second);
    226 }
    227 
    228 template <class T1, class T2>
    229 CUTLASS_HOST_DEVICE constexpr bool operator<(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    230  return (lhs.first < rhs.first) ? true : (rhs.first < lhs.first) ? false
    231  : (lhs.second < rhs.second);
    232 }
    233 
    234 template <class T1, class T2>
    235 CUTLASS_HOST_DEVICE constexpr bool operator<=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    236  return !(rhs < lhs);
    237 }
    238 
    239 template <class T1, class T2>
    240 CUTLASS_HOST_DEVICE constexpr bool operator>(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    241  return (rhs < lhs);
    242 }
    243 
    244 template <class T1, class T2>
    245 CUTLASS_HOST_DEVICE constexpr bool operator>=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    246  return !(lhs < rhs);
    247 }
    248 
    249 template <class T1, class T2>
    250 CUTLASS_HOST_DEVICE std::pair<T1, T2> make_pair(T1 t, T2 u) {
    251  std::pair<T1, T2> retval;
    252  retval.first = t;
    253  retval.second = u;
    254  return retval;
    255 }
    256 #endif
    257 
    258 } // namespace nv_std
    259 
    260 /******************************************************************************
    261  * Implementations of C++ 11/14/17/... STL features
    262  ******************************************************************************/
    263 
    264 namespace nv_std {
    265 
    266 //-----------------------------------------------------------------------------
    267 // Integral constant helper types <type_traits>
    268 //-----------------------------------------------------------------------------
    269 
    270 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    271 
    273 template <typename value_t, value_t V>
    275 
    277 template <typename value_t, value_t V>
    278 struct integral_constant {
    279  static const value_t value = V;
    280 
    281  typedef value_t value_type;
    283 
    284  CUTLASS_HOST_DEVICE operator value_type() const { return value; }
    285 
    286  CUTLASS_HOST_DEVICE const value_type operator()() const { return value; }
    287 };
    288 
    289 #else
    290 
    291 using std::integral_constant;
    292 using std::pair;
    293 
    294 #endif
    295 
    298 
    301 
    302 #if (!defined(_MSC_VER) && (__cplusplus < 201402L)) || (defined(_MSC_VER) && (_MSC_VER < 1900))
    303 
    305 template <bool V>
    307 
    308 #else
    309 
    310 using std::bool_constant;
    311 
    312 #endif
    313 
    314 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1700))
    315 
    317 struct nullptr_t {};
    318 
    319 #else
    320 
    321 using std::nullptr_t;
    322 
    323 #endif
    324 
    325 //-----------------------------------------------------------------------------
    326 // Conditional metaprogramming <type_traits>
    327 //-----------------------------------------------------------------------------
    328 
    329 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600))
    330 
    332 template <bool C, typename T = void>
    333 struct enable_if {
    334  typedef T type;
    335 };
    336 
    338 template <typename T>
    339 struct enable_if<false, T> {};
    340 
    342 template <bool B, class T, class F>
    343 struct conditional {
    344  typedef T type;
    345 };
    346 
    348 template <class T, class F>
    349 struct conditional<false, T, F> {
    350  typedef F type;
    351 };
    352 
    353 #else
    354 
    355 using std::enable_if;
    356 using std::conditional;
    357 
    358 #endif
    359 
    360 //-----------------------------------------------------------------------------
    361 // Const/volatility specifiers <type_traits>
    362 //-----------------------------------------------------------------------------
    363 
    364 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    365 
    367 template <typename T>
    368 struct remove_const {
    369  typedef T type;
    370 };
    371 
    373 template <typename T>
    374 struct remove_const<const T> {
    375  typedef T type;
    376 };
    377 
    379 template <typename T>
    381  typedef T type;
    382 };
    383 
    385 template <typename T>
    386 struct remove_volatile<volatile T> {
    387  typedef T type;
    388 };
    389 
    391 template <typename T>
    392 struct remove_cv {
    394 };
    395 
    396 #else
    397 
    398 using std::remove_const;
    399 using std::remove_volatile;
    400 using std::remove_cv;
    401 
    402 #endif
    403 
    404 //-----------------------------------------------------------------------------
    405 // Type relationships <type_traits>
    406 //-----------------------------------------------------------------------------
    407 
    408 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    409 
    411 template <typename A, typename B>
    412 struct is_same : false_type {};
    413 
    415 template <typename A>
    416 struct is_same<A, A> : true_type {};
    417 
    419 template <typename BaseT, typename DerivedT>
    421  typedef char (&yes)[1];
    422  typedef char (&no)[2];
    423 
    424  template <typename B, typename D>
    425  struct dummy {
    426  CUTLASS_HOST_DEVICE operator B*() const;
    427  CUTLASS_HOST_DEVICE operator D*();
    428  };
    429 
    430  template <typename T>
    431  CUTLASS_HOST_DEVICE static yes check(DerivedT*, T);
    432 
    433  CUTLASS_HOST_DEVICE static no check(BaseT*, int);
    434 
    435  static const bool value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes);
    436 };
    437 
    439 template <typename BaseT, typename DerivedT>
    441  : integral_constant<bool, (is_base_of_helper<typename remove_cv<BaseT>::type,
    442  typename remove_cv<DerivedT>::type>::value) ||
    443  (is_same<typename remove_cv<BaseT>::type,
    444  typename remove_cv<DerivedT>::type>::value)> {};
    445 
    446 #else
    447 
    448 using std::is_same;
    449 using std::is_base_of;
    450 
    451 #endif
    452 
    453 //-----------------------------------------------------------------------------
    454 // Type properties <type_traits>
    455 //-----------------------------------------------------------------------------
    456 
    457 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    458 
    460 template <typename T>
    462 template <typename T>
    463 struct is_volatile<volatile T> : true_type {};
    464 
    466 template <typename T>
    468 
    470 template <typename T>
    471 struct is_pointer_helper<T*> : true_type {};
    472 
    474 template <typename T>
    475 struct is_pointer : is_pointer_helper<typename remove_cv<T>::type> {};
    476 
    478 template <typename T>
    479 struct is_void : is_same<void, typename remove_cv<T>::type> {};
    480 
    482 template <typename T>
    484 template <>
    485 struct is_integral<char> : true_type {};
    486 template <>
    487 struct is_integral<signed char> : true_type {};
    488 template <>
    489 struct is_integral<unsigned char> : true_type {};
    490 template <>
    491 struct is_integral<short> : true_type {};
    492 template <>
    493 struct is_integral<unsigned short> : true_type {};
    494 template <>
    495 struct is_integral<int> : true_type {};
    496 template <>
    497 struct is_integral<unsigned int> : true_type {};
    498 template <>
    499 struct is_integral<long> : true_type {};
    500 template <>
    501 struct is_integral<unsigned long> : true_type {};
    502 template <>
    503 struct is_integral<long long> : true_type {};
    504 template <>
    505 struct is_integral<unsigned long long> : true_type {};
    506 template <typename T>
    507 struct is_integral<volatile T> : is_integral<T> {};
    508 template <typename T>
    509 struct is_integral<const T> : is_integral<T> {};
    510 template <typename T>
    511 struct is_integral<const volatile T> : is_integral<T> {};
    512 
    514 template <typename T>
    516  : integral_constant<bool, (is_same<float, typename remove_cv<T>::type>::value ||
    517  is_same<double, typename remove_cv<T>::type>::value)> {};
    518 
    520 template <typename T>
    522  : integral_constant<bool, (is_integral<T>::value || is_floating_point<T>::value)> {};
    523 
    525 template <typename T>
    527  : integral_constant<bool, (is_arithmetic<T>::value || is_void<T>::value ||
    528  is_same<nullptr_t, typename remove_cv<T>::type>::value)> {};
    529 
    530 #else
    531 
    532 using std::is_volatile;
    533 using std::is_pointer;
    534 using std::is_void;
    535 using std::is_integral;
    536 using std::is_floating_point;
    537 using std::is_arithmetic;
    538 using std::is_fundamental;
    539 
    540 #endif
    541 
    542 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
    543  (defined(__GNUG__) && (__GNUC__ < 5))
    544 
    555 template <typename T>
    557  : integral_constant<bool, (is_fundamental<T>::value || is_pointer<T>::value)> {};
    558 
    559 #else
    560 
    561 using std::is_trivially_copyable;
    562 
    563 #endif
    564 
    565 //-----------------------------------------------------------------------------
    566 // Alignment and layout utilities
    567 //-----------------------------------------------------------------------------
    568 
    569 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    570 
    572 template <typename value_t>
    573 struct alignment_of {
    574  struct pad {
    575  value_t val;
    576  char byte;
    577  };
    578 
    579  enum { value = sizeof(pad) - sizeof(value_t) };
    580 };
    581 
    582 #else
    583 
    584 template <typename value_t>
    585 struct alignment_of : std::alignment_of<value_t> {};
    586 
    587 #endif
    588 
    589 /* 16B specializations where 32-bit Win32 host compiler disagrees with device compiler */
    590 template <>
    591 struct alignment_of<int4> {
    592  enum { value = 16 };
    593 };
    594 template <>
    595 struct alignment_of<uint4> {
    596  enum { value = 16 };
    597 };
    598 template <>
    599 struct alignment_of<float4> {
    600  enum { value = 16 };
    601 };
    602 template <>
    603 struct alignment_of<long4> {
    604  enum { value = 16 };
    605 };
    606 template <>
    607 struct alignment_of<ulong4> {
    608  enum { value = 16 };
    609 };
    610 template <>
    611 struct alignment_of<longlong2> {
    612  enum { value = 16 };
    613 };
    614 template <>
    615 struct alignment_of<ulonglong2> {
    616  enum { value = 16 };
    617 };
    618 template <>
    619 struct alignment_of<double2> {
    620  enum { value = 16 };
    621 };
    622 template <>
    623 struct alignment_of<longlong4> {
    624  enum { value = 16 };
    625 };
    626 template <>
    627 struct alignment_of<ulonglong4> {
    628  enum { value = 16 };
    629 };
    630 template <>
    631 struct alignment_of<double4> {
    632  enum { value = 16 };
    633 };
    634 
    635 // Specializations for volatile/const qualified types
    636 template <typename value_t>
    637 struct alignment_of<volatile value_t> : alignment_of<value_t> {};
    638 template <typename value_t>
    639 struct alignment_of<const value_t> : alignment_of<value_t> {};
    640 template <typename value_t>
    641 struct alignment_of<const volatile value_t> : alignment_of<value_t> {};
    642 
    643 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800))
    644 
    645 template <size_t Align>
    647 template <>
    648 struct __align__(1) aligned_chunk<1> {
    649  uint8_t buff;
    650 };
    651 template <>
    652 struct __align__(2) aligned_chunk<2> {
    653  uint16_t buff;
    654 };
    655 template <>
    656 struct __align__(4) aligned_chunk<4> {
    657  uint32_t buff;
    658 };
    659 template <>
    660 struct __align__(8) aligned_chunk<8> {
    661  uint32_t buff[2];
    662 };
    663 template <>
    664 struct __align__(16) aligned_chunk<16> {
    665  uint32_t buff[4];
    666 };
    667 template <>
    668 struct __align__(32) aligned_chunk<32> {
    669  uint32_t buff[8];
    670 };
    671 template <>
    672 struct __align__(64) aligned_chunk<64> {
    673  uint32_t buff[16];
    674 };
    675 template <>
    676 struct __align__(128) aligned_chunk<128> {
    677  uint32_t buff[32];
    678 };
    679 template <>
    680 struct __align__(256) aligned_chunk<256> {
    681  uint32_t buff[64];
    682 };
    683 template <>
    684 struct __align__(512) aligned_chunk<512> {
    685  uint32_t buff[128];
    686 };
    687 template <>
    688 struct __align__(1024) aligned_chunk<1024> {
    689  uint32_t buff[256];
    690 };
    691 template <>
    692 struct __align__(2048) aligned_chunk<2048> {
    693  uint32_t buff[512];
    694 };
    695 template <>
    696 struct __align__(4096) aligned_chunk<4096> {
    697  uint32_t buff[1024];
    698 };
    699 
    701 template <size_t Len, size_t Align>
    704 };
    705 
    706 #else
    707 
    708 using std::aligned_storage;
    709 
    710 #endif
    711 
    712 #if !defined(__CUDACC_RTC__)
    713 template <typename T>
    716  void operator()(T* ptr) const { delete ptr; }
    717 };
    718 
    720 template <typename T>
    721 struct default_delete<T[]> {
    722  void operator()(T* ptr) const { delete[] ptr; }
    723 };
    724 
    726 template <class T, class Deleter = nv_std::default_delete<T> >
    727 class unique_ptr {
    728  public:
    729  typedef T* pointer;
    730  typedef T element_type;
    731  typedef Deleter deleter_type;
    732 
    733  private:
    735  pointer _ptr;
    736 
    738  deleter_type _deleter;
    739 
    740  public:
    741  unique_ptr() : _ptr(nullptr) {}
    742  unique_ptr(pointer p) : _ptr(p) {}
    743 
    745  if (_ptr) {
    746  _deleter(_ptr);
    747  }
    748  }
    750  pointer get() const noexcept { return _ptr; }
    751 
    754  pointer p(_ptr);
    755  _ptr = nullptr;
    756  return p;
    757  }
    758 
    761  pointer old_ptr = _ptr;
    762  _ptr = p;
    763  if (old_ptr != nullptr) {
    764  get_deleter()(old_ptr);
    765  }
    766  }
    767 
    769  void swap(unique_ptr& other) noexcept { std::swap(_ptr, other._ptr); }
    770 
    772  Deleter& get_deleter() noexcept { return _deleter; }
    773 
    775  Deleter const& get_deleter() const noexcept { return _deleter; }
    776 
    778  operator bool() const noexcept { return _ptr != nullptr; }
    779 
    781  T& operator*() const { return *_ptr; }
    782 
    784  pointer operator->() const noexcept { return _ptr; }
    785 
    787  T& operator[](size_t i) const { return _ptr[i]; }
    788 };
    789 
    791 template <typename T, typename Deleter>
    793  lhs.swap(rhs);
    794 }
    795 #endif
    796 
    797 }; // namespace nv_std
    std::unique_ptr
    Definition: nv_std.h:727
    +
    Definition: nv_std.h:574
    +
    void reset(pointer p=pointer()) noexcept
    Replaces the managed object, deleting the old object.
    Definition: nv_std.h:760
    +
    static const bool value
    Definition: nv_std.h:435
    +
    CUTLASS_HOST_DEVICE constexpr bool operator>(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: nv_std.h:240
    +
    std::conditional (true specialization)
    Definition: nv_std.h:343
    +
    T type
    Definition: nv_std.h:344
    +
    value_t value_type
    Definition: nv_std.h:281
    +
    Deleter & get_deleter() noexcept
    Returns the deleter object.
    Definition: nv_std.h:772
    +
    pointer release() noexcept
    Releases ownership of the managed object, if any.
    Definition: nv_std.h:753
    +
    T type
    Definition: nv_std.h:334
    +
    integral_constant< bool, false > false_type
    The type used as a compile-time boolean with false value.
    Definition: nv_std.h:300
    +
    std::is_pointer
    Definition: nv_std.h:475
    +
    pointer operator->() const noexcept
    Returns a pointer to the managed object.
    Definition: nv_std.h:784
    +
    std::alignment_of
    Definition: nv_std.h:573
    +
    Definition: nv_std.h:556
    +
    integral_constant< value_t, V > type
    Definition: nv_std.h:282
    +
    char byte
    Definition: nv_std.h:576
    +
    Definition: nv_std.h:579
    +
    Helper for std::is_pointer (false specialization)
    Definition: nv_std.h:467
    +
    T & operator[](size_t i) const
    Array access to managed object.
    Definition: nv_std.h:787
    +
    char(& no)[2]
    Definition: nv_std.h:422
    +
    std::less
    Definition: nv_std.h:181
    +
    Deleter deleter_type
    Definition: nv_std.h:731
    +
    value_t val
    Definition: nv_std.h:575
    +
    #define constexpr
    Definition: nv_std.h:129
    +
    std::remove_volatile (non-volatile specialization)
    Definition: nv_std.h:380
    +
    static const value_t value
    Definition: nv_std.h:279
    +
    CUTLASS_HOST_DEVICE constexpr bool operator==(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: nv_std.h:219
    +
    std::remove_cv
    Definition: nv_std.h:392
    +
    std::is_base_of
    Definition: nv_std.h:440
    +
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > make_pair(T1 t, T2 u)
    Definition: nv_std.h:250
    +
    std::integral_constant
    Definition: nv_std.h:274
    +
    struct __align__(1) aligned_chunk< 1 >
    Definition: nv_std.h:648
    +
    void operator()(T *ptr) const
    Definition: nv_std.h:716
    +
    void swap(unique_ptr &other) noexcept
    Swaps the managed objects with *this and another unique_ptr.
    Definition: nv_std.h:769
    +
    std::remove_const (non-const specialization)
    Definition: nv_std.h:368
    +
    CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)
    std::max
    Definition: nv_std.h:207
    +
    Definition: nv_std.h:167
    +
    char(& yes)[1]
    Definition: nv_std.h:421
    +
    T type
    Definition: nv_std.h:381
    +
    CUTLASS_HOST_DEVICE constexpr bool operator!=(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: nv_std.h:224
    +
    std::is_volatile
    Definition: nv_std.h:461
    +
    T element_type
    Definition: nv_std.h:730
    +
    nv_std::plus
    Definition: nv_std.h:175
    +
    std::is_same (false specialization)
    Definition: nv_std.h:412
    +
    Default deleter.
    Definition: nv_std.h:715
    +
    T * pointer
    Definition: nv_std.h:729
    +
    Deleter const & get_deleter() const noexcept
    Returns the deleter object.
    Definition: nv_std.h:775
    +
    std::is_integral
    Definition: nv_std.h:483
    +
    Helper for std::is_base_of.
    Definition: nv_std.h:420
    +
    std::is_fundamental
    Definition: nv_std.h:526
    +
    void swap(unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
    Specializes the swap algorithm.
    Definition: nv_std.h:792
    +
    Definition: nv_std.h:425
    +
    CUTLASS_HOST_DEVICE const value_type operator()() const
    Definition: nv_std.h:286
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    #define noexcept
    noexcept, constexpr
    Definition: nv_std.h:126
    +
    std::enable_if (true specialization)
    Definition: nv_std.h:333
    +
    Definition: nv_std.h:646
    +
    unique_ptr()
    Definition: nv_std.h:741
    +
    std::greater
    Definition: nv_std.h:189
    +
    std::is_floating_point
    Definition: nv_std.h:515
    +
    #define nullptr
    nullptr
    Definition: nv_std.h:136
    +
    CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)
    std::min
    Definition: nv_std.h:201
    +
    CUTLASS_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const
    Definition: nv_std.h:190
    +
    aligned_chunk< Align > type[Len/sizeof(aligned_chunk< Align >)]
    Definition: nv_std.h:703
    +
    std::nullptr_t
    Definition: nv_std.h:317
    +
    CUTLASS_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const
    Definition: nv_std.h:182
    +
    unique_ptr(pointer p)
    Definition: nv_std.h:742
    +
    std::aligned_storage
    Definition: nv_std.h:702
    +
    remove_volatile< typename remove_const< T >::type >::type type
    Definition: nv_std.h:393
    +
    integral_constant< bool, true > true_type
    The type used as a compile-time boolean with true value.
    Definition: nv_std.h:297
    +
    T & operator*() const
    Dereferences the unique_ptr.
    Definition: nv_std.h:781
    +
    std::bool_constant
    Definition: nv_std.h:306
    +
    std::is_void
    Definition: nv_std.h:479
    +
    F type
    Definition: nv_std.h:350
    +
    static CUTLASS_HOST_DEVICE yes check(DerivedT *, T)
    +
    T type
    Definition: nv_std.h:369
    +
    CUTLASS_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const
    Definition: nv_std.h:176
    +
    T type
    Definition: nv_std.h:375
    +
    Basic include for CUTLASS macros.
    +
    T type
    Definition: nv_std.h:387
    +
    void operator()(T *ptr) const
    Definition: nv_std.h:722
    +
    CUTLASS_HOST_DEVICE constexpr bool operator>=(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: nv_std.h:245
    +
    ~unique_ptr()
    Definition: nv_std.h:744
    +
    std::is_arithmetic
    Definition: nv_std.h:521
    +
    + + + + diff --git a/docs/generated-html/open.png b/docs/generated-html/open.png new file mode 100644 index 00000000..6bc64cce Binary files /dev/null and b/docs/generated-html/open.png differ diff --git a/docs/generated-html/platform_8h.html b/docs/generated-html/platform_8h.html new file mode 100644 index 00000000..8513f5b3 --- /dev/null +++ b/docs/generated-html/platform_8h.html @@ -0,0 +1,632 @@ + + + + + + + +Cutlass: platform.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    platform.h File Reference
    +
    +
    + +

    C++ features that may be otherwise unimplemented for CUDA device functions. +More...

    +
    #include <stdint.h>
    +#include <algorithm>
    +#include <cstddef>
    +#include <functional>
    +#include <utility>
    +#include <cutlass/cutlass.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::platform::plus< T >
     platform::plus More...
     
    struct  cutlass::platform::less< T >
     std::less More...
     
    struct  cutlass::platform::greater< T >
     std::greater More...
     
    struct  cutlass::platform::integral_constant< value_t, V >
     std::integral_constant More...
     
    struct  cutlass::platform::integral_constant< value_t, V >
     std::integral_constant More...
     
    struct  cutlass::platform::bool_constant< V >
     std::bool_constant More...
     
    struct  cutlass::platform::nullptr_t
     std::nullptr_t More...
     
    struct  cutlass::platform::enable_if< C, T >
     std::enable_if (true specialization) More...
     
    struct  cutlass::platform::enable_if< false, T >
     std::enable_if (false specialization) More...
     
    struct  cutlass::platform::conditional< B, T, F >
     std::conditional (true specialization) More...
     
    struct  cutlass::platform::conditional< false, T, F >
     std::conditional (false specialization) More...
     
    struct  cutlass::platform::remove_const< T >
     std::remove_const (non-const specialization) More...
     
    struct  cutlass::platform::remove_const< const T >
     std::remove_const (const specialization) More...
     
    struct  cutlass::platform::remove_volatile< T >
     std::remove_volatile (non-volatile specialization) More...
     
    struct  cutlass::platform::remove_volatile< volatile T >
     std::remove_volatile (volatile specialization) More...
     
    struct  cutlass::platform::remove_cv< T >
     std::remove_cv More...
     
    struct  cutlass::platform::is_same< A, B >
     std::is_same (false specialization) More...
     
    struct  cutlass::platform::is_same< A, A >
     std::is_same (true specialization) More...
     
    struct  cutlass::platform::is_base_of_helper< BaseT, DerivedT >
     Helper for std::is_base_of. More...
     
    struct  cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >
     
    struct  cutlass::platform::is_base_of< BaseT, DerivedT >
     std::is_base_of More...
     
    struct  cutlass::platform::is_volatile< T >
     std::is_volatile More...
     
    struct  cutlass::platform::is_volatile< volatile T >
     
    struct  cutlass::platform::is_pointer_helper< T >
     Helper for std::is_pointer (false specialization) More...
     
    struct  cutlass::platform::is_pointer_helper< T * >
     Helper for std::is_pointer (true specialization) More...
     
    struct  cutlass::platform::is_pointer< T >
     std::is_pointer More...
     
    struct  cutlass::platform::is_void< T >
     std::is_void More...
     
    struct  cutlass::platform::is_integral< T >
     std::is_integral More...
     
    struct  cutlass::platform::is_integral< char >
     
    struct  cutlass::platform::is_integral< signed char >
     
    struct  cutlass::platform::is_integral< unsigned char >
     
    struct  cutlass::platform::is_integral< short >
     
    struct  cutlass::platform::is_integral< unsigned short >
     
    struct  cutlass::platform::is_integral< int >
     
    struct  cutlass::platform::is_integral< unsigned int >
     
    struct  cutlass::platform::is_integral< long >
     
    struct  cutlass::platform::is_integral< unsigned long >
     
    struct  cutlass::platform::is_integral< long long >
     
    struct  cutlass::platform::is_integral< unsigned long long >
     
    struct  cutlass::platform::is_integral< volatile T >
     
    struct  cutlass::platform::is_integral< const T >
     
    struct  cutlass::platform::is_integral< const volatile T >
     
    struct  cutlass::platform::is_floating_point< T >
     std::is_floating_point More...
     
    struct  cutlass::platform::is_arithmetic< T >
     std::is_arithmetic More...
     
    struct  cutlass::platform::is_fundamental< T >
     std::is_fundamental More...
     
    struct  cutlass::platform::is_trivially_copyable< T >
     
    struct  cutlass::platform::alignment_of< value_t >
     std::alignment_of More...
     
    struct  cutlass::platform::alignment_of< value_t >::pad
     
    struct  cutlass::platform::alignment_of< int4 >
     
    struct  cutlass::platform::alignment_of< uint4 >
     
    struct  cutlass::platform::alignment_of< float4 >
     
    struct  cutlass::platform::alignment_of< long4 >
     
    struct  cutlass::platform::alignment_of< ulong4 >
     
    struct  cutlass::platform::alignment_of< longlong2 >
     
    struct  cutlass::platform::alignment_of< ulonglong2 >
     
    struct  cutlass::platform::alignment_of< double2 >
     
    struct  cutlass::platform::alignment_of< longlong4 >
     
    struct  cutlass::platform::alignment_of< ulonglong4 >
     
    struct  cutlass::platform::alignment_of< double4 >
     
    struct  cutlass::platform::alignment_of< volatile value_t >
     
    struct  cutlass::platform::alignment_of< const value_t >
     
    struct  cutlass::platform::alignment_of< const volatile value_t >
     
    struct  cutlass::platform::aligned_chunk< Align >
     
    struct  cutlass::platform::aligned_storage< Len, Align >
     std::aligned_storage More...
     
    struct  cutlass::platform::default_delete< T >
     Default deleter. More...
     
    struct  cutlass::platform::default_delete< T[]>
     Partial specialization for deleting array types. More...
     
    class  cutlass::platform::unique_ptr< T, Deleter >
     std::unique_ptr More...
     
    + + + + + +

    +Namespaces

     cutlass
     
     cutlass::platform
     
    + + + + + + + + + + + + + + + + + + + + + + +

    +Macros

    #define noexcept
     noexcept, constexpr More...
     
    #define constexpr
     
    #define nullptr   0
     nullptr More...
     
    #define __platform_cat_(a, b)   a##b
     static_assert More...
     
    #define __platform_cat(a, b)   __platform_cat_(a, b)
     
    #define static_assert(__e, __m)   typedef int __platform_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
     
    #define __NV_STD_MAX(a, b)   (((b) > (a)) ? (b) : (a))
     Select maximum(a, b) More...
     
    #define __NV_STD_MIN(a, b)   (((b) < (a)) ? (b) : (a))
     Select minimum(a, b) More...
     
    + + + + + + + +

    +Typedefs

    typedef integral_constant< bool, true > cutlass::platform::true_type
     The type used as a compile-time boolean with true value. More...
     
    typedef integral_constant< bool, false > cutlass::platform::false_type
     The type used as a compile-time boolean with false value. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & cutlass::platform::min (const T &a, const T &b)
     std::min More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & cutlass::platform::max (const T &a, const T &b)
     std::max More...
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator== (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator!= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator< (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator<= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator> (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator>= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > cutlass::platform::make_pair (T1 t, T2 u)
     
    template<>
    struct cutlass::platform::__align__ (1) aligned_chunk< 1 >
     
    template<>
    struct cutlass::platform::__align__ (2) aligned_chunk< 2 >
     
    template<>
    struct cutlass::platform::__align__ (4) aligned_chunk< 4 >
     
    template<>
    struct cutlass::platform::__align__ (8) aligned_chunk< 8 >
     
    template<>
    struct cutlass::platform::__align__ (16) aligned_chunk< 16 >
     
    template<>
    struct cutlass::platform::__align__ (32) aligned_chunk< 32 >
     
    template<>
    struct cutlass::platform::__align__ (64) aligned_chunk< 64 >
     
    template<>
    struct cutlass::platform::__align__ (128) aligned_chunk< 128 >
     
    template<>
    struct cutlass::platform::__align__ (256) aligned_chunk< 256 >
     
    template<>
    struct cutlass::platform::__align__ (512) aligned_chunk< 512 >
     
    template<>
    struct cutlass::platform::__align__ (1024) aligned_chunk< 1024 >
     
    template<>
    struct cutlass::platform::__align__ (2048) aligned_chunk< 2048 >
     
    template<>
    struct cutlass::platform::__align__ (4096) aligned_chunk< 4096 >
     
    template<typename T , typename Deleter >
    void cutlass::platform::swap (unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
     Specializes the swap algorithm. More...
     
    +

    Detailed Description

    +

    This file has three components:

    +

    (1) Macros:

      +
    • Empty macro defines for C++ keywords not supported by the current version of C++. These simply allow compilation to proceed (but do not provide the added semantics).
        +
      • noexcept
      • +
      • constexpr
      • +
      • nullptr
      • +
      • static_assert
      • +
      +
    • +
    • Macro functions that we need in constant expressions because the C++ equivalents require constexpr compiler support. These are prefixed with __NV_STD_*
        +
      • __NV_STD_MAX
      • +
      • __NV_STD_MIN
      • +
      +
    • +
    +

    (2) Re-implementations of STL functions and types:

      +
    • C++ features that need the device annotation. These are placed into the platform namespace.
        +
      • plus
      • +
      • less
      • +
      • greater
      • +
      • min
      • +
      • max
      • +
      • methods on std::pair (==, !=, <, <=, >, >=, and make_pair())
      • +
      +
    • +
    +

    (3) Stop-gap implementations of unsupported STL functions and types:

      +
    • STL functions and types defined by C++ 11/14/17/etc. that are not provided by the current version of C++. These are placed into the platform namespace
        +
      • integral_constant
      • +
      • nullptr_t
      • +
      • true_type
      • +
      • false_type
      • +
      • bool_constant
      • +
      • enable_if
      • +
      • conditional
      • +
      • is_same
      • +
      • is_base_of
      • +
      • remove_const
      • +
      • remove_volatile
      • +
      • remove_cv
      • +
      • is_volatile
      • +
      • is_pointer
      • +
      • is_void
      • +
      • is_integral
      • +
      • is_floating_point
      • +
      • is_arithmetic
      • +
      • is_fundamental
      • +
      • is_trivially_copyable
      • +
      • alignment_of
      • +
      • aligned_storage
      • +
      +
    • +
    +

    (4) Functions and types that are STL-like (but aren't in the STL):

      +
    • TODO: min and max functors?
    • +
    +

    The idea is that, as we drop support for older compilers, we can simply #define the __NV_STD_XYZ macros and platform namespace to alias their C++ counterparts (or trivially find-and-replace their occurrences in code text).

    +

    Macro Definition Documentation

    + +

    ◆ __NV_STD_MAX

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __NV_STD_MAX( a,
     
    )   (((b) > (a)) ? (b) : (a))
    +
    + +
    +
    + +

    ◆ __NV_STD_MIN

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __NV_STD_MIN( a,
     
    )   (((b) < (a)) ? (b) : (a))
    +
    + +
    +
    + +

    ◆ __platform_cat

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __platform_cat( a,
     
    )   __platform_cat_(a, b)
    +
    + +
    +
    + +

    ◆ __platform_cat_

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __platform_cat_( a,
     
    )   a##b
    +
    + +
    +
    + +

    ◆ constexpr

    + +
    +
    + + + + +
    #define constexpr
    +
    + +
    +
    + +

    ◆ noexcept

    + +
    +
    + + + + +
    #define noexcept
    +
    + +
    +
    + +

    ◆ nullptr

    + +
    +
    + + + + +
    #define nullptr   0
    +
    + +
    +
    + +

    ◆ static_assert

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define static_assert( __e,
     __m 
    )   typedef int __platform_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/platform_8h_source.html b/docs/generated-html/platform_8h_source.html new file mode 100644 index 00000000..9dcbacf2 --- /dev/null +++ b/docs/generated-html/platform_8h_source.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: platform.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    platform.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    25 
    26 #pragma once
    27 
    94 //-----------------------------------------------------------------------------
    95 // Dependencies
    96 //-----------------------------------------------------------------------------
    97 
    98 #include <stdint.h>
    99 
    100 #if !defined(__CUDACC_RTC__)
    101 //-----------------------------------------------------------------------------
    102 // Include STL files that platform provides functionality for
    103 //-----------------------------------------------------------------------------
    104 
    105 #include <algorithm> // Minimum/maximum operations
    106 #include <cstddef> // nullptr_t
    107 #include <functional> // Arithmetic operations
    108 #include <utility> // For methods on std::pair
    109 #if (!defined(_MSC_VER) && (__cplusplus >= 201103L)) || (defined(_MSC_VER) && (_MS_VER >= 1500))
    110 #include <type_traits> // For integral constants, conditional metaprogramming, and type traits
    111 #endif
    112 
    113 #include <cutlass/cutlass.h>
    114 
    115 #endif
    116 /******************************************************************************
    117  * Macros
    118  ******************************************************************************/
    119 //-----------------------------------------------------------------------------
    120 // Keywords
    121 //-----------------------------------------------------------------------------
    122 
    124 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1900))
    125 #ifndef noexcept
    126 #define noexcept
    127 #endif
    128 #ifndef constexpr
    129 #define constexpr
    130 #endif
    131 #endif
    132 
    134 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1310))
    135 #ifndef nullptr
    136 #define nullptr 0
    137 #endif
    138 #endif
    139 
    141 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600))
    142 #ifndef static_assert
    143 #define __platform_cat_(a, b) a##b
    144 #define __platform_cat(a, b) __platform_cat_(a, b)
    145 #define static_assert(__e, __m) typedef int __platform_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
    146 #endif
    147 #endif
    148 
    149 //-----------------------------------------------------------------------------
    150 // Functions
    151 //-----------------------------------------------------------------------------
    152 
    154 #ifndef __NV_STD_MAX
    155 #define __NV_STD_MAX(a, b) (((b) > (a)) ? (b) : (a))
    156 #endif
    157 
    159 #ifndef __NV_STD_MIN
    160 #define __NV_STD_MIN(a, b) (((b) < (a)) ? (b) : (a))
    161 #endif
    162 
    163 /******************************************************************************
    164  * Re-implementations
    165  ******************************************************************************/
    166 namespace cutlass {
    167 namespace platform {
    168 
    169 //-----------------------------------------------------------------------------
    170 // Arithmetic operations, comparisons <functional>
    171 //-----------------------------------------------------------------------------
    172 
    174 template <typename T>
    175 struct plus {
    176  CUTLASS_HOST_DEVICE constexpr T operator()(const T& lhs, const T& rhs) const { return lhs + rhs; }
    177 };
    178 
    180 template <typename T>
    181 struct less {
    182  CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const {
    183  return lhs < rhs;
    184  }
    185 };
    186 
    188 template <typename T>
    189 struct greater {
    190  CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const {
    191  return lhs > rhs;
    192  }
    193 };
    194 
    195 //-----------------------------------------------------------------------------
    196 // Minimum/maximum operations <algorithm>
    197 //-----------------------------------------------------------------------------
    198 
    200 template <typename T>
    201 CUTLASS_HOST_DEVICE constexpr const T& min(const T& a, const T& b) {
    202  return (b < a) ? b : a;
    203 }
    204 
    206 template <typename T>
    207 CUTLASS_HOST_DEVICE constexpr const T& max(const T& a, const T& b) {
    208  return (a < b) ? b : a;
    209 }
    210 
    211 #if !defined(__CUDACC_RTC__)
    212 //-----------------------------------------------------------------------------
    213 // Methods on std::pair
    214 //-----------------------------------------------------------------------------
    215 
    216 using std::pair;
    217 
    218 template <class T1, class T2>
    219 CUTLASS_HOST_DEVICE constexpr bool operator==(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    220  return (lhs.first == rhs.first) && (lhs.second == rhs.second);
    221 }
    222 
    223 template <class T1, class T2>
    224 CUTLASS_HOST_DEVICE constexpr bool operator!=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    225  return (lhs.first != rhs.first) && (lhs.second != rhs.second);
    226 }
    227 
    228 template <class T1, class T2>
    229 CUTLASS_HOST_DEVICE constexpr bool operator<(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    230  return (lhs.first < rhs.first) ? true : (rhs.first < lhs.first) ? false
    231  : (lhs.second < rhs.second);
    232 }
    233 
    234 template <class T1, class T2>
    235 CUTLASS_HOST_DEVICE constexpr bool operator<=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    236  return !(rhs < lhs);
    237 }
    238 
    239 template <class T1, class T2>
    240 CUTLASS_HOST_DEVICE constexpr bool operator>(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    241  return (rhs < lhs);
    242 }
    243 
    244 template <class T1, class T2>
    245 CUTLASS_HOST_DEVICE constexpr bool operator>=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    246  return !(lhs < rhs);
    247 }
    248 
    249 template <class T1, class T2>
    250 CUTLASS_HOST_DEVICE std::pair<T1, T2> make_pair(T1 t, T2 u) {
    251  std::pair<T1, T2> retval;
    252  retval.first = t;
    253  retval.second = u;
    254  return retval;
    255 }
    256 #endif
    257 
    258 } // namespace platform
    259 
    260 /******************************************************************************
    261  * Implementations of C++ 11/14/17/... STL features
    262  ******************************************************************************/
    263 
    264 namespace platform {
    265 
    266 //-----------------------------------------------------------------------------
    267 // Integral constant helper types <type_traits>
    268 //-----------------------------------------------------------------------------
    269 
    270 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    271 
    273 template <typename value_t, value_t V>
    275 
    277 template <typename value_t, value_t V>
    278 struct integral_constant {
    279  static const value_t value = V;
    280 
    281  typedef value_t value_type;
    283 
    284  CUTLASS_HOST_DEVICE operator value_type() const { return value; }
    285 
    286  CUTLASS_HOST_DEVICE const value_type operator()() const { return value; }
    287 };
    288 
    289 #else
    290 
    291 using std::integral_constant;
    292 using std::pair;
    293 
    294 #endif
    295 
    298 
    301 
    302 #if (!defined(_MSC_VER) && (__cplusplus < 201402L)) || (defined(_MSC_VER) && (_MSC_VER < 1900))
    303 
    305 template <bool V>
    307 
    308 #else
    309 
    310 using std::bool_constant;
    311 
    312 #endif
    313 
    314 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1700))
    315 
    317 struct nullptr_t {};
    318 
    319 #else
    320 
    321 using std::nullptr_t;
    322 
    323 #endif
    324 
    325 //-----------------------------------------------------------------------------
    326 // Conditional metaprogramming <type_traits>
    327 //-----------------------------------------------------------------------------
    328 
    329 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600))
    330 
    332 template <bool C, typename T = void>
    333 struct enable_if {
    334  typedef T type;
    335 };
    336 
    338 template <typename T>
    339 struct enable_if<false, T> {};
    340 
    342 template <bool B, class T, class F>
    343 struct conditional {
    344  typedef T type;
    345 };
    346 
    348 template <class T, class F>
    349 struct conditional<false, T, F> {
    350  typedef F type;
    351 };
    352 
    353 #else
    354 
    355 using std::enable_if;
    356 using std::conditional;
    357 
    358 #endif
    359 
    360 //-----------------------------------------------------------------------------
    361 // Const/volatility specifiers <type_traits>
    362 //-----------------------------------------------------------------------------
    363 
    364 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    365 
    367 template <typename T>
    368 struct remove_const {
    369  typedef T type;
    370 };
    371 
    373 template <typename T>
    374 struct remove_const<const T> {
    375  typedef T type;
    376 };
    377 
    379 template <typename T>
    381  typedef T type;
    382 };
    383 
    385 template <typename T>
    386 struct remove_volatile<volatile T> {
    387  typedef T type;
    388 };
    389 
    391 template <typename T>
    392 struct remove_cv {
    394 };
    395 
    396 #else
    397 
    398 using std::remove_const;
    399 using std::remove_volatile;
    400 using std::remove_cv;
    401 
    402 #endif
    403 
    404 //-----------------------------------------------------------------------------
    405 // Type relationships <type_traits>
    406 //-----------------------------------------------------------------------------
    407 
    408 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    409 
    411 template <typename A, typename B>
    412 struct is_same : false_type {};
    413 
    415 template <typename A>
    416 struct is_same<A, A> : true_type {};
    417 
    419 template <typename BaseT, typename DerivedT>
    421  typedef char (&yes)[1];
    422  typedef char (&no)[2];
    423 
    424  template <typename B, typename D>
    425  struct dummy {
    426  CUTLASS_HOST_DEVICE operator B*() const;
    427  CUTLASS_HOST_DEVICE operator D*();
    428  };
    429 
    430  template <typename T>
    431  CUTLASS_HOST_DEVICE static yes check(DerivedT*, T);
    432 
    433  CUTLASS_HOST_DEVICE static no check(BaseT*, int);
    434 
    435  static const bool value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes);
    436 };
    437 
    439 template <typename BaseT, typename DerivedT>
    441  : integral_constant<bool,
    442  (is_base_of_helper<typename remove_cv<BaseT>::type,
    443  typename remove_cv<DerivedT>::type>::value) ||
    444  (is_same<typename remove_cv<BaseT>::type,
    445  typename remove_cv<DerivedT>::type>::value)> {};
    446 
    447 #else
    448 
    449 using std::is_same;
    450 using std::is_base_of;
    451 
    452 #endif
    453 
    454 //-----------------------------------------------------------------------------
    455 // Type properties <type_traits>
    456 //-----------------------------------------------------------------------------
    457 
    458 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    459 
    461 template <typename T>
    463 template <typename T>
    464 struct is_volatile<volatile T> : true_type {};
    465 
    467 template <typename T>
    469 
    471 template <typename T>
    472 struct is_pointer_helper<T*> : true_type {};
    473 
    475 template <typename T>
    476 struct is_pointer : is_pointer_helper<typename remove_cv<T>::type> {};
    477 
    479 template <typename T>
    480 struct is_void : is_same<void, typename remove_cv<T>::type> {};
    481 
    483 template <typename T>
    485 template <>
    486 struct is_integral<char> : true_type {};
    487 template <>
    488 struct is_integral<signed char> : true_type {};
    489 template <>
    490 struct is_integral<unsigned char> : true_type {};
    491 template <>
    492 struct is_integral<short> : true_type {};
    493 template <>
    494 struct is_integral<unsigned short> : true_type {};
    495 template <>
    496 struct is_integral<int> : true_type {};
    497 template <>
    498 struct is_integral<unsigned int> : true_type {};
    499 template <>
    500 struct is_integral<long> : true_type {};
    501 template <>
    502 struct is_integral<unsigned long> : true_type {};
    503 template <>
    504 struct is_integral<long long> : true_type {};
    505 template <>
    506 struct is_integral<unsigned long long> : true_type {};
    507 template <typename T>
    508 struct is_integral<volatile T> : is_integral<T> {};
    509 template <typename T>
    510 struct is_integral<const T> : is_integral<T> {};
    511 template <typename T>
    512 struct is_integral<const volatile T> : is_integral<T> {};
    513 
    515 template <typename T>
    517  : integral_constant<bool,
    518  (is_same<float, typename remove_cv<T>::type>::value ||
    519  is_same<double, typename remove_cv<T>::type>::value)> {};
    520 
    522 template <typename T>
    524  : integral_constant<bool, (is_integral<T>::value || is_floating_point<T>::value)> {};
    525 
    527 template <typename T>
    529  : integral_constant<bool,
    530  (is_arithmetic<T>::value || is_void<T>::value ||
    531  is_same<nullptr_t, typename remove_cv<T>::type>::value)> {};
    532 
    533 #else
    534 
    535 using std::is_volatile;
    536 using std::is_pointer;
    537 using std::is_void;
    538 using std::is_integral;
    539 using std::is_floating_point;
    540 using std::is_arithmetic;
    541 using std::is_fundamental;
    542 
    543 #endif
    544 
    545 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
    546  (defined(__GNUG__) && (__GNUC__ < 5))
    547 
    558 template <typename T>
    560  : integral_constant<bool, (is_fundamental<T>::value || is_pointer<T>::value)> {};
    561 
    562 #else
    563 
    564 using std::is_trivially_copyable;
    565 
    566 #endif
    567 
    568 //-----------------------------------------------------------------------------
    569 // Alignment and layout utilities
    570 //-----------------------------------------------------------------------------
    571 
    572 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    573 
    575 template <typename value_t>
    576 struct alignment_of {
    577  struct pad {
    578  value_t val;
    579  char byte;
    580  };
    581 
    582  enum { value = sizeof(pad) - sizeof(value_t) };
    583 };
    584 
    585 #else
    586 
    587 template <typename value_t>
    588 struct alignment_of : std::alignment_of<value_t> {};
    589 
    590 #endif
    591 
    592 /* 16B specializations where 32-bit Win32 host compiler disagrees with device compiler */
    593 template <>
    594 struct alignment_of<int4> {
    595  enum { value = 16 };
    596 };
    597 template <>
    598 struct alignment_of<uint4> {
    599  enum { value = 16 };
    600 };
    601 template <>
    602 struct alignment_of<float4> {
    603  enum { value = 16 };
    604 };
    605 template <>
    606 struct alignment_of<long4> {
    607  enum { value = 16 };
    608 };
    609 template <>
    610 struct alignment_of<ulong4> {
    611  enum { value = 16 };
    612 };
    613 template <>
    614 struct alignment_of<longlong2> {
    615  enum { value = 16 };
    616 };
    617 template <>
    618 struct alignment_of<ulonglong2> {
    619  enum { value = 16 };
    620 };
    621 template <>
    622 struct alignment_of<double2> {
    623  enum { value = 16 };
    624 };
    625 template <>
    626 struct alignment_of<longlong4> {
    627  enum { value = 16 };
    628 };
    629 template <>
    630 struct alignment_of<ulonglong4> {
    631  enum { value = 16 };
    632 };
    633 template <>
    634 struct alignment_of<double4> {
    635  enum { value = 16 };
    636 };
    637 
    638 // Specializations for volatile/const qualified types
    639 template <typename value_t>
    640 struct alignment_of<volatile value_t> : alignment_of<value_t> {};
    641 template <typename value_t>
    642 struct alignment_of<const value_t> : alignment_of<value_t> {};
    643 template <typename value_t>
    644 struct alignment_of<const volatile value_t> : alignment_of<value_t> {};
    645 
    646 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800))
    647 
    648 template <size_t Align>
    650 template <>
    651 struct __align__(1) aligned_chunk<1> {
    652  uint8_t buff;
    653 };
    654 template <>
    655 struct __align__(2) aligned_chunk<2> {
    656  uint16_t buff;
    657 };
    658 template <>
    659 struct __align__(4) aligned_chunk<4> {
    660  uint32_t buff;
    661 };
    662 template <>
    663 struct __align__(8) aligned_chunk<8> {
    664  uint32_t buff[2];
    665 };
    666 template <>
    667 struct __align__(16) aligned_chunk<16> {
    668  uint32_t buff[4];
    669 };
    670 template <>
    671 struct __align__(32) aligned_chunk<32> {
    672  uint32_t buff[8];
    673 };
    674 template <>
    675 struct __align__(64) aligned_chunk<64> {
    676  uint32_t buff[16];
    677 };
    678 template <>
    679 struct __align__(128) aligned_chunk<128> {
    680  uint32_t buff[32];
    681 };
    682 template <>
    683 struct __align__(256) aligned_chunk<256> {
    684  uint32_t buff[64];
    685 };
    686 template <>
    687 struct __align__(512) aligned_chunk<512> {
    688  uint32_t buff[128];
    689 };
    690 template <>
    691 struct __align__(1024) aligned_chunk<1024> {
    692  uint32_t buff[256];
    693 };
    694 template <>
    695 struct __align__(2048) aligned_chunk<2048> {
    696  uint32_t buff[512];
    697 };
    698 template <>
    699 struct __align__(4096) aligned_chunk<4096> {
    700  uint32_t buff[1024];
    701 };
    702 
    704 template <size_t Len, size_t Align>
    707 };
    708 
    709 #else
    710 
    711 using std::aligned_storage;
    712 
    713 #endif
    714 
    715 #if !defined(__CUDACC_RTC__)
    716 template <typename T>
    719  void operator()(T* ptr) const { delete ptr; }
    720 };
    721 
    723 template <typename T>
    724 struct default_delete<T[]> {
    725  void operator()(T* ptr) const { delete[] ptr; }
    726 };
    727 
    729 template <class T, class Deleter = default_delete<T> >
    730 class unique_ptr {
    731  public:
    732  typedef T* pointer;
    733  typedef T element_type;
    734  typedef Deleter deleter_type;
    735 
    736  private:
    738  pointer _ptr;
    739 
    741  deleter_type _deleter;
    742 
    743  public:
    744  unique_ptr() : _ptr(nullptr) {}
    745  unique_ptr(pointer p) : _ptr(p) {}
    746 
    748  if (_ptr) {
    749  _deleter(_ptr);
    750  }
    751  }
    753  pointer get() const noexcept { return _ptr; }
    754 
    757  pointer p(_ptr);
    758  _ptr = nullptr;
    759  return p;
    760  }
    761 
    764  pointer old_ptr = _ptr;
    765  _ptr = p;
    766  if (old_ptr != nullptr) {
    767  get_deleter()(old_ptr);
    768  }
    769  }
    770 
    772  void swap(unique_ptr& other) noexcept { std::swap(_ptr, other._ptr); }
    773 
    775  Deleter& get_deleter() noexcept { return _deleter; }
    776 
    778  Deleter const& get_deleter() const noexcept { return _deleter; }
    779 
    781  operator bool() const noexcept { return _ptr != nullptr; }
    782 
    784  T& operator*() const { return *_ptr; }
    785 
    787  pointer operator->() const noexcept { return _ptr; }
    788 
    790  T& operator[](size_t i) const { return _ptr[i]; }
    791 };
    792 
    794 template <typename T, typename Deleter>
    796  lhs.swap(rhs);
    797 }
    798 #endif
    799 
    800 }; // namespace platform
    801 }; // namespace cutlass
    static const value_t value
    Definition: platform.h:279
    +
    CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)
    std::max
    Definition: platform.h:207
    +
    Definition: convert.h:33
    +
    #define constexpr
    Definition: platform.h:129
    +
    std::nullptr_t
    Definition: platform.h:317
    +
    void swap(unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
    Specializes the swap algorithm.
    Definition: platform.h:795
    +
    Helper for std::is_pointer (false specialization)
    Definition: platform.h:468
    +
    Deleter deleter_type
    Definition: platform.h:734
    +
    T type
    Definition: platform.h:369
    +
    value_t val
    Definition: platform.h:578
    +
    T type
    Definition: platform.h:344
    +
    T * pointer
    Definition: platform.h:732
    +
    std::less
    Definition: platform.h:181
    +
    std::is_same (false specialization)
    Definition: platform.h:412
    +
    std::is_pointer
    Definition: platform.h:476
    +
    value_t value_type
    Definition: platform.h:281
    +
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > make_pair(T1 t, T2 u)
    Definition: platform.h:250
    +
    unique_ptr()
    Definition: platform.h:744
    +
    std::greater
    Definition: platform.h:189
    +
    CUTLASS_HOST_DEVICE constexpr bool operator==(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: platform.h:219
    +
    std::is_void
    Definition: platform.h:480
    +
    CUTLASS_HOST_DEVICE constexpr bool operator>=(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: platform.h:245
    +
    pointer operator->() const noexcept
    Returns a pointer to the managed object.
    Definition: platform.h:787
    +
    T & operator[](size_t i) const
    Array access to managed object.
    Definition: platform.h:790
    +
    CUTLASS_HOST_DEVICE constexpr bool operator>(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: platform.h:240
    +
    void operator()(T *ptr) const
    Definition: platform.h:725
    +
    Default deleter.
    Definition: platform.h:718
    +
    Definition: platform.h:582
    +
    CUTLASS_HOST_DEVICE constexpr bool operator!=(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: platform.h:224
    +
    std::unique_ptr
    Definition: platform.h:730
    +
    Definition: platform.h:577
    +
    std::is_floating_point
    Definition: platform.h:516
    + +
    integral_constant< bool, false > false_type
    The type used as a compile-time boolean with false value.
    Definition: platform.h:300
    +
    Deleter const & get_deleter() const noexcept
    Returns the deleter object.
    Definition: platform.h:778
    +
    std::remove_cv
    Definition: platform.h:392
    +
    CUTLASS_HOST_DEVICE const value_type operator()() const
    Definition: platform.h:286
    +
    ~unique_ptr()
    Definition: platform.h:747
    +
    CUTLASS_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const
    Definition: platform.h:190
    +
    struct __align__(1) aligned_chunk< 1 >
    Definition: platform.h:651
    +
    T type
    Definition: platform.h:375
    +
    T type
    Definition: platform.h:381
    +
    std::is_integral
    Definition: platform.h:484
    + + +
    integral_constant< value_t, V > type
    Definition: platform.h:282
    +
    std::is_arithmetic
    Definition: platform.h:523
    +
    char byte
    Definition: platform.h:579
    +
    std::integral_constant
    Definition: platform.h:274
    +
    std::is_base_of
    Definition: platform.h:440
    +
    T type
    Definition: platform.h:334
    +
    #define nullptr
    nullptr
    Definition: platform.h:136
    +
    std::is_volatile
    Definition: platform.h:462
    +
    std::is_fundamental
    Definition: platform.h:528
    +
    platform::plus
    Definition: platform.h:175
    +
    std::enable_if (true specialization)
    Definition: platform.h:333
    +
    integral_constant< bool, true > true_type
    The type used as a compile-time boolean with true value.
    Definition: platform.h:297
    +
    void operator()(T *ptr) const
    Definition: platform.h:719
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    T element_type
    Definition: platform.h:733
    +
    Deleter & get_deleter() noexcept
    Returns the deleter object.
    Definition: platform.h:775
    +
    std::alignment_of
    Definition: platform.h:576
    +
    CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)
    std::min
    Definition: platform.h:201
    +
    remove_volatile< typename remove_const< T >::type >::type type
    Definition: platform.h:393
    +
    std::conditional (true specialization)
    Definition: platform.h:343
    +
    #define noexcept
    noexcept, constexpr
    Definition: platform.h:126
    +
    void reset(pointer p=pointer()) noexcept
    Replaces the managed object, deleting the old object.
    Definition: platform.h:763
    +
    T & operator*() const
    Dereferences the unique_ptr.
    Definition: platform.h:784
    +
    Helper for std::is_base_of.
    Definition: platform.h:420
    +
    std::remove_const (non-const specialization)
    Definition: platform.h:368
    +
    CUTLASS_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const
    Definition: platform.h:176
    +
    CUTLASS_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const
    Definition: platform.h:182
    +
    Definition: platform.h:649
    +
    static CUTLASS_HOST_DEVICE yes check(DerivedT *, T)
    + +
    void swap(unique_ptr &other) noexcept
    Swaps the managed objects with *this and another unique_ptr.
    Definition: platform.h:772
    +
    static const bool value
    Definition: platform.h:435
    +
    aligned_chunk< Align > type[Len/sizeof(aligned_chunk< Align >)]
    Definition: platform.h:706
    +
    std::aligned_storage
    Definition: platform.h:705
    +
    std::remove_volatile (non-volatile specialization)
    Definition: platform.h:380
    +
    unique_ptr(pointer p)
    Definition: platform.h:745
    +
    char(& yes)[1]
    Definition: platform.h:421
    +
    pointer release() noexcept
    Releases ownership of the managed object, if any.
    Definition: platform.h:756
    +
    Basic include for CUTLASS macros.
    +
    std::bool_constant
    Definition: platform.h:306
    +
    char(& no)[2]
    Definition: platform.h:422
    +
    + + + + diff --git a/docs/generated-html/predicate__vector_8h.html b/docs/generated-html/predicate__vector_8h.html new file mode 100644 index 00000000..42e3f56f --- /dev/null +++ b/docs/generated-html/predicate__vector_8h.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: predicate_vector.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    predicate_vector.h File Reference
    +
    +
    + +

    Defines container classes and iterators for managing a statically sized vector of boolean predicates. +More...

    +
    #include <stdint.h>
    +#include <cutlass/cutlass.h>
    +#include <cutlass/shape.h>
    +#include <cutlass/util/platform.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >
     Statically sized array of bits implementing. More...
     
    class  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator
     A const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes. More...
     
    class  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator
     An iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates. More...
     
    struct  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator
     Iterator that always returns true. More...
     
    struct  cutlass::TrivialPredicateTileAdapter
     Always returns true predicate. More...
     
    struct  cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >
     Adapter to enable random access to predicates via logical coordinate within a tile. More...
     
    struct  cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >
     Adapter to enable random access to predicates via logical coordinate within a tile. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/predicate__vector_8h_source.html b/docs/generated-html/predicate__vector_8h_source.html new file mode 100644 index 00000000..fed29ff1 --- /dev/null +++ b/docs/generated-html/predicate__vector_8h_source.html @@ -0,0 +1,155 @@ + + + + + + + +Cutlass: predicate_vector.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    predicate_vector.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <stdint.h>
    32 
    33 #include <cutlass/cutlass.h>
    34 #include <cutlass/shape.h>
    35 
    36 #include <cutlass/util/platform.h>
    37 
    38 namespace cutlass {
    39 
    41 
    58 
    78 
    94 
    97 template <
    99  int kPredicates_,
    101  int kPredicatesPerByte_ = 4,
    103  int kPredicateStart_ = 0>
    106  static int const kPredicates = kPredicates_;
    107 
    109  static int const kPredicatesPerByte = kPredicatesPerByte_;
    110 
    112  static int const kPredicateStart = kPredicateStart_;
    113 
    114  // Make sure no one tries to put more than 8 bits in a byte :)
    115  static_assert(kPredicatesPerByte <= 8, "kPredicatesPerByte must fit within an actual byte");
    116  // Make sure the "offsetted" bits fit in one byte.
    118  "The offsetted predicates must fit within an actual byte.");
    119 
    121  typedef uint32_t Storage;
    122 
    125 
    127  static int const kWordCount = (kBytes + sizeof(Storage) - 1) / sizeof(Storage);
    128 
    129  private:
    130  //
    131  // Data members
    132  //
    133 
    135  Storage storageData[kWordCount];
    136 
    137  //
    138  // Methods
    139  //
    140 
    142  CUTLASS_HOST_DEVICE void computeStorageOffset(int &word, int &bit, int idx) const {
    144 
    145  int byte = (idx / kPredicatesPerByte);
    146  int bit_offset = (idx % kPredicatesPerByte);
    147 
    148  word = byte / sizeof(Storage);
    149  int byte_offset = (byte % sizeof(Storage));
    150 
    151  bit = byte_offset * 8 + bit_offset + kPredicateStart;
    152  }
    153 
    155  CUTLASS_HOST_DEVICE Storage &storage(int word) {
    156  CUTLASS_ASSERT(word < kWordCount);
    157  return storageData[word];
    158  }
    159 
    161  CUTLASS_HOST_DEVICE Storage const &storage(int word) const {
    162  CUTLASS_ASSERT(word < kWordCount);
    163  return storageData[word];
    164  }
    165 
    166  public:
    167  //
    168  // Iterator
    169  //
    170 
    178  PredicateVector const &vec_;
    179 
    181  int bit_;
    182 
    183  public:
    186  ConstIterator(ConstIterator const &it) : vec_(it.vec_), bit_(it.bit_) {}
    187 
    190  ConstIterator(PredicateVector const &_vec, int _start = 0) : vec_(_vec), bit_(_start) {}
    191 
    195  ++bit_;
    196  return *this;
    197  }
    198 
    202  --bit_;
    203  return *this;
    204  }
    205 
    209  ConstIterator ret(*this);
    210  ret.bit_++;
    211  return ret;
    212  }
    213 
    217  ConstIterator ret(*this);
    218  ret.bit_--;
    219  return ret;
    220  }
    221 
    224  bool operator==(ConstIterator const &it) const { return bit_ == it.bit_; }
    225 
    228  bool operator!=(ConstIterator const &it) const { return bit_ != it.bit_; }
    229 
    232  bool operator*() const { return vec_[bit_]; }
    233  };
    234 
    240  class Iterator {
    242  PredicateVector &vec_;
    243 
    245  int bit_;
    246 
    247  public:
    250  Iterator(Iterator const &it) : vec_(it.vec_), bit_(it.bit_) {}
    251 
    254  Iterator(PredicateVector &_vec, int _start = 0) : vec_(_vec), bit_(_start) {}
    255 
    259  ++bit_;
    260  return *this;
    261  }
    262 
    266  --bit_;
    267  return *this;
    268  }
    269 
    273  Iterator ret(*this);
    274  ret.bit_++;
    275  return ret;
    276  }
    277 
    281  Iterator ret(*this);
    282  ret.bit_--;
    283  return ret;
    284  }
    285 
    288  bool operator==(Iterator const &it) const { return bit_ == it.bit_; }
    289 
    292  bool operator!=(Iterator const &it) const { return bit_ != it.bit_; }
    293 
    296  bool get() { return vec_[bit_]; }
    297 
    300  bool operator*() const { return vec_[bit_]; }
    301 
    304  void set(bool value = true) { vec_.set(bit_, value); }
    305  };
    306 
    312 
    315  TrivialIterator(Iterator const &it) {}
    316 
    320 
    323  TrivialIterator &operator++() { return *this; }
    324 
    327  TrivialIterator operator++(int) { return *this; }
    328 
    331  bool operator*() const { return true; }
    332  };
    333 
    334  public:
    335  //
    336  // Methods
    337  //
    338 
    340  CUTLASS_HOST_DEVICE PredicateVector(bool value = true) { fill(value); }
    341 
    343  CUTLASS_HOST_DEVICE void fill(bool value = true) {
    344  Storage item = (value ? ~Storage(0) : Storage(0));
    345 
    347  for (int i = 0; i < kWordCount; ++i) {
    348  storage(i) = item;
    349  }
    350  }
    351 
    353  CUTLASS_HOST_DEVICE bool operator[](int idx) const { return at(idx); }
    354 
    356  CUTLASS_HOST_DEVICE bool at(int idx) const {
    357  int bit, word;
    358  computeStorageOffset(word, bit, idx);
    359 
    360  return ((storage(word) >> bit) & 1);
    361  }
    362 
    364  CUTLASS_HOST_DEVICE void set(int idx, bool value = true) {
    365  int bit, word;
    366  computeStorageOffset(word, bit, idx);
    367 
    368  Storage disable_mask = (~(Storage(1) << bit));
    369  Storage enable_mask = (Storage(value) << bit);
    370 
    371  storage(word) = ((storage(word) & disable_mask) | enable_mask);
    372  }
    373 
    377  for (int i = 0; i < kWordCount; ++i) {
    378  storage(i) = (storage(i) & predicates.storage(i));
    379  }
    380  return *this;
    381  }
    382 
    386  for (int i = 0; i < kWordCount; ++i) {
    387  storage(i) = (storage(i) | predicates.storage(i));
    388  }
    389  return *this;
    390  }
    391 
    394  Storage mask(0);
    395  for (int byte = 0; byte < sizeof(Storage); ++byte) {
    396  Storage byte_mask = (((1 << kPredicatesPerByte) - 1) << kPredicateStart);
    397  mask |= (byte_mask << (byte * 8));
    398  }
    399  uint32_t result = 0;
    400  for (int word = 0; word < kWordCount; ++word) {
    401  result |= storage(word);
    402  }
    403  return result == 0;
    404  }
    405 
    407  CUTLASS_DEVICE
    408  Iterator begin() { return Iterator(*this); }
    409 
    411  CUTLASS_DEVICE
    412  Iterator end() { return Iterator(*this, kPredicates); }
    413 
    415  CUTLASS_DEVICE
    416  ConstIterator const_begin() const { return ConstIterator(*this); }
    417 
    419  CUTLASS_DEVICE
    420  ConstIterator const_end() const { return ConstIterator(*this, kPredicates); }
    421 };
    422 
    424 
    429 
    431  CUTLASS_HOST_DEVICE bool at(int, int, int, int) const { return true; }
    432 };
    433 
    435 
    437 template <typename PredicateVector_, typename Iterations_>
    440  typedef PredicateVector_ PredicateVector;
    442  typedef Iterations_ Iterations;
    443 
    444  private:
    446  PredicateVector &predicates;
    447 
    448  public:
    450  CUTLASS_DEVICE PredicateTileAdapter(PredicateVector &predicates_) : predicates(predicates_) {}
    451 
    453  CUTLASS_DEVICE bool at(int d, int h, int w, int c) const {
    454  int const bit = ComputeOffsetFromShape<Iterations>::get(d, h, w, c);
    455  return predicates.at(bit);
    456  }
    457 
    459  CUTLASS_DEVICE void set(int d, int h, int w, int c, bool value) {
    460  int const bit = ComputeOffsetFromShape<Iterations>::get(d, h, w, c);
    461  predicates.set(bit, value);
    462  }
    463 };
    464 
    466 
    468 template <typename PredicateVector_, typename Iterations_>
    471  typedef PredicateVector_ PredicateVector;
    473  typedef Iterations_ Iterations;
    474 
    475  private:
    477  PredicateVector const &predicates;
    478 
    479  public:
    481  CUTLASS_DEVICE ConstPredicateTileAdapter(PredicateVector const &predicates_)
    482  : predicates(predicates_) {}
    483 
    485  CUTLASS_DEVICE bool at(int d, int h, int w, int c) const {
    486  int const bit = ComputeOffsetFromShape<Iterations>::get(d, h, w, c);
    487  return predicates.at(bit);
    488  }
    489 };
    490 
    492 
    493 } // namespace cutlass
    CUTLASS_HOST_DEVICE Iterator(PredicateVector &_vec, int _start=0)
    Constructs an iterator from a PredicateVector.
    Definition: predicate_vector.h:254
    +
    CUTLASS_HOST_DEVICE bool operator!=(ConstIterator const &it) const
    Returns false if iterators point to the same bit.
    Definition: predicate_vector.h:228
    +
    CUTLASS_HOST_DEVICE PredicateVector & operator|=(PredicateVector const &predicates)
    Computes the union of two identical predicate vectors.
    Definition: predicate_vector.h:384
    +
    CUTLASS_HOST_DEVICE TrivialIterator & operator++()
    Pre-increment.
    Definition: predicate_vector.h:323
    +
    Definition: convert.h:33
    +
    CUTLASS_HOST_DEVICE bool is_zero() const
    Returns true if entire predicate array is zero.
    Definition: predicate_vector.h:393
    +
    uint32_t Storage
    Storage type of individual elements.
    Definition: predicate_vector.h:115
    +
    CUTLASS_HOST_DEVICE TrivialIterator(PredicateVector const &_vec)
    Constructs an iterator from a PredicateVector.
    Definition: predicate_vector.h:319
    +
    CUTLASS_HOST_DEVICE ConstIterator & operator--()
    Pre-decrement.
    Definition: predicate_vector.h:201
    +
    static int const kBytes
    Number of bytes needed.
    Definition: predicate_vector.h:124
    +
    CUTLASS_DEVICE ConstIterator const_begin() const
    Returns a ConstIterator.
    Definition: predicate_vector.h:416
    +
    CUTLASS_HOST_DEVICE ConstIterator(PredicateVector const &_vec, int _start=0)
    Definition: predicate_vector.h:190
    +
    CUTLASS_HOST_DEVICE bool at(int idx) const
    Accesses a bit within the predicate vector.
    Definition: predicate_vector.h:356
    +
    CUTLASS_HOST_DEVICE ConstIterator & operator++()
    Pre-increment.
    Definition: predicate_vector.h:194
    +
    PredicateVector_ PredicateVector
    The vector of predicates.
    Definition: predicate_vector.h:440
    +
    static CUTLASS_DEVICE int get(int d, int h, int w, int c)
    Definition: shape.h:166
    +
    CUTLASS_HOST_DEVICE ConstIterator operator++(int)
    Post-increment.
    Definition: predicate_vector.h:208
    +
    CUTLASS_HOST_DEVICE Iterator operator++(int)
    Post-increment.
    Definition: predicate_vector.h:272
    +
    Adapter to enable random access to predicates via logical coordinate within a tile.
    Definition: predicate_vector.h:438
    +
    CUTLASS_HOST_DEVICE TrivialIterator(Iterator const &it)
    Copy constructor.
    Definition: predicate_vector.h:315
    +
    C++ features that may be otherwise unimplemented for CUDA device functions.
    +
    Iterator that always returns true.
    Definition: predicate_vector.h:308
    +
    CUTLASS_HOST_DEVICE TrivialIterator operator++(int)
    Post-increment.
    Definition: predicate_vector.h:327
    +
    CUTLASS_HOST_DEVICE bool operator==(Iterator const &it) const
    Returns true if iterators point to the same bit.
    Definition: predicate_vector.h:288
    +
    CUTLASS_DEVICE PredicateTileAdapter(PredicateVector &predicates_)
    Ctor.
    Definition: predicate_vector.h:450
    +
    CUTLASS_DEVICE bool at(int d, int h, int w, int c) const
    Get the value at location (d, h, w, c).
    Definition: predicate_vector.h:453
    +
    #define CUTLASS_PRAGMA_UNROLL
    Definition: cutlass.h:60
    +
    CUTLASS_DEVICE bool at(int d, int h, int w, int c) const
    Get the value at location (d, h, w, c).
    Definition: predicate_vector.h:485
    +
    CUTLASS_HOST_DEVICE Iterator & operator--()
    Pre-decrement.
    Definition: predicate_vector.h:265
    +
    PredicateVector_ PredicateVector
    The vector of predicates.
    Definition: predicate_vector.h:471
    +
    CUTLASS_HOST_DEVICE PredicateVector & operator &=(PredicateVector const &predicates)
    Computes the intersection of two identical predicate vectors.
    Definition: predicate_vector.h:375
    +
    CUTLASS_HOST_DEVICE Iterator(Iterator const &it)
    Copy constructor.
    Definition: predicate_vector.h:250
    +
    CUTLASS_HOST_DEVICE bool operator[](int idx) const
    Accesses a bit within the predicate vector.
    Definition: predicate_vector.h:353
    +
    CUTLASS_HOST_DEVICE bool operator*() const
    Dereferences iterator.
    Definition: predicate_vector.h:300
    +
    CUTLASS_HOST_DEVICE bool operator*() const
    Dereferences iterator.
    Definition: predicate_vector.h:331
    +
    CUTLASS_HOST_DEVICE void fill(bool value=true)
    Fills all predicates with a given value.
    Definition: predicate_vector.h:343
    +
    static int const kPredicates
    Number of bits stored by the PredicateVector.
    Definition: predicate_vector.h:106
    +
    CUTLASS_DEVICE Iterator end()
    Returns an iterator.
    Definition: predicate_vector.h:412
    +
    #define CUTLASS_ASSERT(x)
    Definition: cutlass.h:64
    +
    CUTLASS_HOST_DEVICE bool at(int, int, int, int) const
    The value at location (d, h, w, c).
    Definition: predicate_vector.h:431
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    static int const kPredicatesPerByte
    Number of bits stored within each byte of the predicate bit vector.
    Definition: predicate_vector.h:109
    +
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    Statically sized array of bits implementing.
    Definition: predicate_vector.h:104
    +
    static int const kWordCount
    Number of storage elements needed.
    Definition: predicate_vector.h:127
    +
    CUTLASS_DEVICE ConstIterator const_end() const
    Returns a ConstIterator.
    Definition: predicate_vector.h:420
    +
    Always returns true predicate.
    Definition: predicate_vector.h:426
    +
    CUTLASS_HOST_DEVICE Iterator & operator++()
    Pre-increment.
    Definition: predicate_vector.h:258
    +
    A const iterator implementing Predicate Iterator Concept enabling sequential read-only access to pred...
    Definition: predicate_vector.h:176
    +
    CUTLASS_HOST_DEVICE void set(int idx, bool value=true)
    Set a bit within the predicate vector.
    Definition: predicate_vector.h:364
    +
    CUTLASS_HOST_DEVICE bool operator==(ConstIterator const &it) const
    Returns true if iterators point to the same bit.
    Definition: predicate_vector.h:224
    +
    Iterations_ Iterations
    The iterations.
    Definition: predicate_vector.h:473
    +
    Iterations_ Iterations
    The iterations.
    Definition: predicate_vector.h:442
    +
    CUTLASS_HOST_DEVICE bool operator*() const
    Dereferences iterator.
    Definition: predicate_vector.h:232
    +
    CUTLASS_HOST_DEVICE bool operator!=(Iterator const &it) const
    Returns false if iterators point to the same bit.
    Definition: predicate_vector.h:292
    +
    static int const kPredicateStart
    First bit withing each byte containing predicates.
    Definition: predicate_vector.h:112
    +
    CUTLASS_HOST_DEVICE ConstIterator(ConstIterator const &it)
    Copy constructor.
    Definition: predicate_vector.h:186
    +
    CUTLASS_HOST_DEVICE TrivialPredicateTileAdapter()
    Ctor.
    Definition: predicate_vector.h:428
    +
    CUTLASS_HOST_DEVICE ConstIterator operator--(int)
    Post-decrement.
    Definition: predicate_vector.h:216
    +
    Adapter to enable random access to predicates via logical coordinate within a tile.
    Definition: predicate_vector.h:469
    +
    CUTLASS_DEVICE ConstPredicateTileAdapter(PredicateVector const &predicates_)
    Ctor.
    Definition: predicate_vector.h:481
    +
    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
    +
    CUTLASS_HOST_DEVICE PredicateVector(bool value=true)
    Initialize the predicate vector.
    Definition: predicate_vector.h:340
    +
    CUTLASS_DEVICE Iterator begin()
    Returns an iterator to the start of the bit vector.
    Definition: predicate_vector.h:408
    +
    Basic include for CUTLASS macros.
    +
    An iterator implementing Predicate Iterator Concept enabling sequential read and write access to pred...
    Definition: predicate_vector.h:240
    +
    CUTLASS_HOST_DEVICE Iterator operator--(int)
    Post-decrement.
    Definition: predicate_vector.h:280
    +
    CUTLASS_HOST_DEVICE TrivialIterator()
    Constructor.
    Definition: predicate_vector.h:311
    +
    + + + + diff --git a/docs/generated-html/reshape__tile_8h.html b/docs/generated-html/reshape__tile_8h.html new file mode 100644 index 00000000..3712944a --- /dev/null +++ b/docs/generated-html/reshape__tile_8h.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: reshape_tile.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    reshape_tile.h File Reference
    +
    +
    + +

    Defines a type for restructuring a tile. +More...

    +
    #include <cutlass/shape.h>
    +
    +

    Go to the source code of this file.

    + + + + + + +

    +Classes

    struct  cutlass::ReshapeTile< Tile_, kAccessSize_, bool >
     
    struct  cutlass::ReshapeTile< Tile_, kAccessSize_, true >
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/reshape__tile_8h_source.html b/docs/generated-html/reshape__tile_8h_source.html new file mode 100644 index 00000000..bb7a1179 --- /dev/null +++ b/docs/generated-html/reshape__tile_8h_source.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: reshape_tile.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    reshape_tile.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/shape.h>
    31 
    32 namespace cutlass {
    33 
    35 
    36 // The following functor reshapes a tile of data. The goal is to have at least kAccessSize in
    37 // the inner-most dimension. If the user respects that constraint, there is nothing to be done. If
    38 // that's not the case, this functor will correct that and "extract" the right number of elements
    39 // from the next dimension.
    40 
    41 template <typename Tile_, int kAccessSize_, bool = (Tile_::kC < kAccessSize_)>
    42 struct ReshapeTile {
    43  typedef Tile_ Tile;
    44 };
    45 
    46 template <typename Tile_, int kAccessSize_>
    48  // Make sure the W dimension of the tile is large enough.
    49  static_assert(Tile_::kW >= kAccessSize_, "The W dimension is too small");
    50  // Make sure the dimension can be divided by the number of scalars.
    51  static_assert(Tile_::kW % kAccessSize_ == 0, "Not supported");
    52  // Collapse the W dimension.
    53  typedef Shape<Tile_::kD, Tile_::kH, Tile_::kW / kAccessSize_, kAccessSize_> Tile;
    54 };
    55 
    57 
    58 } // namespace cutlass
    Definition: convert.h:33
    + +
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    Shape< Tile_::kD, Tile_::kH, Tile_::kW/kAccessSize_, kAccessSize_ > Tile
    Definition: reshape_tile.h:49
    +
    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
    +
    + + + + diff --git a/docs/generated-html/search/all_0.html b/docs/generated-html/search/all_0.html new file mode 100644 index 00000000..5125b940 --- /dev/null +++ b/docs/generated-html/search/all_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_0.js b/docs/generated-html/search/all_0.js new file mode 100644 index 00000000..0165dcec --- /dev/null +++ b/docs/generated-html/search/all_0.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['_5f_5falign_5f_5f',['__align__',['../namespacecutlass_1_1platform.html#ac9068e2d027ffdf5cd564deecc2cb9e8',1,'cutlass::platform::__align__(1) aligned_chunk< 1 >'],['../namespacecutlass_1_1platform.html#a0bcb016704ec57f9499e662ba6156f98',1,'cutlass::platform::__align__(2) aligned_chunk< 2 >'],['../namespacecutlass_1_1platform.html#a71be5af25eeffa4077777f919e67d8da',1,'cutlass::platform::__align__(4) aligned_chunk< 4 >'],['../namespacecutlass_1_1platform.html#a42440254a16d4b6b95b95cc3360ee372',1,'cutlass::platform::__align__(8) aligned_chunk< 8 >'],['../namespacecutlass_1_1platform.html#a91d5e970d6ebe619914f40a9510bdb1e',1,'cutlass::platform::__align__(16) aligned_chunk< 16 >'],['../namespacecutlass_1_1platform.html#a210f4d360b1f9c3d074e71129fe4c0d9',1,'cutlass::platform::__align__(32) aligned_chunk< 32 >'],['../namespacecutlass_1_1platform.html#ae792b1c7ada1a33e306cd552f583bdce',1,'cutlass::platform::__align__(64) aligned_chunk< 64 >'],['../namespacecutlass_1_1platform.html#a5712ec4fed335a9b7f863fb3abe3c5eb',1,'cutlass::platform::__align__(128) aligned_chunk< 128 >'],['../namespacecutlass_1_1platform.html#a595cc98db29fb4d59772d2e2f52e347a',1,'cutlass::platform::__align__(256) aligned_chunk< 256 >'],['../namespacecutlass_1_1platform.html#ae70bb5d14a66500b47d2e3f83063d4a5',1,'cutlass::platform::__align__(512) aligned_chunk< 512 >'],['../namespacecutlass_1_1platform.html#a181e44e9c66f704175590727aaa9e5a1',1,'cutlass::platform::__align__(1024) aligned_chunk< 1024 >'],['../namespacecutlass_1_1platform.html#ae72c8fa997bb251d4140dceb03147154',1,'cutlass::platform::__align__(2048) aligned_chunk< 2048 >'],['../namespacecutlass_1_1platform.html#ada29683f1b408ae7b73cc8fbe2108628',1,'cutlass::platform::__align__(4096) aligned_chunk< 4096 >'],['../namespacecutlass.html#ae6ee3d9361526f859d737d9c68c13706',1,'cutlass::__align__(1) AlignedStruct< 1 >'],['../namespacecutlass.html#a602227fad962270da185209ecc6012f2',1,'cutlass::__align__(2) AlignedStruct< 2 >'],['../namespacecutlass.html#a266d7d2ae6e79537e46ee37b4fdface7',1,'cutlass::__align__(4) AlignedStruct< 4 >'],['../namespacecutlass.html#a1101e01215ddb0e5a7b120a4541a3c4e',1,'cutlass::__align__(8) AlignedStruct< 8 >'],['../namespacecutlass.html#aa4071cf5103f352a5100d9b4bba895e2',1,'cutlass::__align__(16) AlignedStruct< 16 >'],['../namespacecutlass.html#ada65694bdd4b70d4c9d769a536275a47',1,'cutlass::__align__(32) AlignedStruct< 32 >'],['../namespacecutlass.html#aa80a7cb3febd19b96f2ecbcb610b1b9e',1,'cutlass::__align__(64) AlignedStruct< 64 >']]], + ['_5f_5fnv_5fstd_5fmax',['__NV_STD_MAX',['../platform_8h.html#abd31f291635329bc15292954f1f01d38',1,'platform.h']]], + ['_5f_5fnv_5fstd_5fmin',['__NV_STD_MIN',['../platform_8h.html#a39e234a3e3b0018b58df720bcb143420',1,'platform.h']]], + ['_5f_5fplatform_5fcat',['__platform_cat',['../platform_8h.html#aece7fe71be5aaf8d12dc9e2372f97de4',1,'platform.h']]], + ['_5f_5fplatform_5fcat_5f',['__platform_cat_',['../platform_8h.html#acd148999a5caeba8f6fd52e7e288e659',1,'platform.h']]] +]; diff --git a/docs/generated-html/search/all_1.html b/docs/generated-html/search/all_1.html new file mode 100644 index 00000000..b8ff8711 --- /dev/null +++ b/docs/generated-html/search/all_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_1.js b/docs/generated-html/search/all_1.js new file mode 100644 index 00000000..b1bf9916 --- /dev/null +++ b/docs/generated-html/search/all_1.js @@ -0,0 +1,31 @@ +var searchData= +[ + ['accesstype',['AccessType',['../structcutlass_1_1FragmentIterator.html#a012c5af3a8a40843c576c55ecbc663e7',1,'cutlass::FragmentIterator::AccessType()'],['../structcutlass_1_1FragmentConstIterator.html#addf5c21444f129211eefe7cdca6dfa1b',1,'cutlass::FragmentConstIterator::AccessType()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html#a0b656c41b9fff6402f33e95204ce8860',1,'cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html#a7eccab04c8d3968e74486d0525a3fa02',1,'cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html#abca5165caae7304f33fcad267c16b002',1,'cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html#a87d46956aa317f06f2ba9a535fdfc5da',1,'cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1Load.html#ad0bf2da0c240f3a2a3f4c92162d347ae',1,'cutlass::Load::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a5d7ed0abaeea99ec3399f8eea930f761',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a2b9faed8d92f55a46e313d79d214316d',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::AccessType()'],['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#a8611550c045d6def964d9dafb2be80c6',1,'cutlass::Load< double, 2, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#a942970f88e13c88f496a9da67ed47a6f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Store.html#a8d2f927b2b61987dcea40e84f4575942',1,'cutlass::Store::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a89f329ba11f96ee3ce4428cbc792ac3d',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#ac0af6ae18137156abe24d6479232b955',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::AccessType()'],['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#ad073f5e8252ad24b086f14bd2a109cf9',1,'cutlass::Store< double, 2, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#aeb70e4859e2795b6af63ad5e203b4da9',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1TileIteratorBase.html#abb3dde23971ad35a477b75ee99381b53',1,'cutlass::TileIteratorBase::AccessType()'],['../structcutlass_1_1TileLoadIterator.html#a4af8eeabe7c1ec0362782687a84466e0',1,'cutlass::TileLoadIterator::AccessType()'],['../structcutlass_1_1TileStoreIterator.html#a0e79ed59263ebc3478c43f2f9a50cb5a',1,'cutlass::TileStoreIterator::AccessType()']]], + ['accumulators',['Accumulators',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#afe6bebd94e3379c94054d04c5196edce',1,'cutlass::gemm::GemmEpilogue::Accumulators()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#af7ff579ccb4269bfa5e9ae297260f7a2',1,'cutlass::gemm::GemmEpilogueTraits::Accumulators()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a2fadb0ad2e28109ccfa9195e817a4d54',1,'cutlass::gemm::GemmConfig::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a505306c2af2059f6e84ba32d701d1602',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a4712650b46b6183ea60d79ef18f55b86',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a760a5262f419b789540e7bbb2fda4b9d',1,'cutlass::gemm::ThreadMultiplyAdd::Accumulators()']]], + ['accumulatorsperthread',['AccumulatorsPerThread',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a98d0f84730551eaabfe7404b36478b50',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerThread()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a47807c9c9fb43e7f7b5f409a49986c30',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerThread()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a002b1944b25cc8fe0862f40a8c8555c5',1,'cutlass::gemm::ThreadMultiplyAdd::AccumulatorsPerThread()']]], + ['accumulatorsperwarp',['AccumulatorsPerWarp',['../structcutlass_1_1gemm_1_1GemmConfig.html#a51d583dfcd645ad0ecfc23b87b3c5108',1,'cutlass::gemm::GemmConfig::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#af0c856abdd9f7f26f671493cc629bf0a',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a327ce1b7b6478c27c80baf5d9e26bdbc',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#aa83190df3c1639b6dd632cd4b9278d77',1,'cutlass::gemm::ThreadMultiplyAdd::AccumulatorsPerWarp()']]], + ['additive',['Additive',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375a77d7cc80ec0c3ff42ca9b2aff98a1646',1,'cutlass::Identity']]], + ['advance',['advance',['../classcutlass_1_1TensorRef.html#aab0dafb81a462320e55e0dc4a5886478',1,'cutlass::TensorRef']]], + ['aligned_5f',['aligned_',['../unioncutlass_1_1Vector.html#a9e9352594fcd022526d5b69b6c25c99c',1,'cutlass::Vector::aligned_()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a9e41dbe541a7dddf1e461e0390fe8896',1,'cutlass::Vector< half, kLanes_ >::aligned_()']]], + ['aligned_5fchunk',['aligned_chunk',['../structcutlass_1_1platform_1_1aligned__chunk.html',1,'cutlass::platform']]], + ['aligned_5fstorage',['aligned_storage',['../structcutlass_1_1platform_1_1aligned__storage.html',1,'cutlass::platform']]], + ['alignedstruct',['AlignedStruct',['../structcutlass_1_1AlignedStruct.html',1,'cutlass']]], + ['alignedstruct_3c_20kvectorsize_20_3e',['AlignedStruct< kVectorSize >',['../structcutlass_1_1AlignedStruct.html',1,'cutlass']]], + ['alignment_5fof',['alignment_of',['../structcutlass_1_1platform_1_1alignment__of.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20const_20value_5ft_20_3e',['alignment_of< const value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20const_20volatile_20value_5ft_20_3e',['alignment_of< const volatile value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20double2_20_3e',['alignment_of< double2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20double4_20_3e',['alignment_of< double4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20float4_20_3e',['alignment_of< float4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20int4_20_3e',['alignment_of< int4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20long4_20_3e',['alignment_of< long4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20longlong2_20_3e',['alignment_of< longlong2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20longlong4_20_3e',['alignment_of< longlong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20uint4_20_3e',['alignment_of< uint4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulong4_20_3e',['alignment_of< ulong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulonglong2_20_3e',['alignment_of< ulonglong2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulonglong4_20_3e',['alignment_of< ulonglong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20volatile_20value_5ft_20_3e',['alignment_of< volatile value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html',1,'cutlass::platform']]], + ['alpha',['alpha',['../structcutlass_1_1gemm_1_1GemmDesc.html#a053c2b529be527f510ee317737fbf7e8',1,'cutlass::gemm::GemmDesc::alpha()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a3248d6b3d9bcc59365d582b879292a70',1,'cutlass::gemm::LinearScaling::Params::alpha()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#ab9c51c8b1f06e935a353ac5b1c22cee6',1,'cutlass::gemm::LinearScaling::alpha()']]], + ['at',['at',['../structcutlass_1_1Coord.html#ad10b59430927a354fcd874d2d32f1bd8',1,'cutlass::Coord::at()'],['../structcutlass_1_1Coord.html#ab511a16210d1b94449f5bc6476f6a266',1,'cutlass::Coord::at(int dim)'],['../structcutlass_1_1Coord.html#af9cc7ab2088544d1240ac51c4c6e685d',1,'cutlass::Coord::at() const'],['../structcutlass_1_1Coord.html#aed4f4d1c7c0749fe72736d7a1213b6e9',1,'cutlass::Coord::at(int dim) const'],['../structcutlass_1_1FragmentIterator.html#a9cf31df06ff035705a1341810fcdcbf2',1,'cutlass::FragmentIterator::at(int d, int h, int w, int c=0) const'],['../structcutlass_1_1FragmentIterator.html#a7bdc407aae8d7360e089af347b585a53',1,'cutlass::FragmentIterator::at(int d, int h, int w, int c=0)'],['../structcutlass_1_1FragmentConstIterator.html#a8b957150545becacab1b8ead1be29424',1,'cutlass::FragmentConstIterator::at()'],['../structcutlass_1_1PredicateVector.html#ac8eca7087d1f7575b0c6beeb5f907bfd',1,'cutlass::PredicateVector::at()'],['../structcutlass_1_1TrivialPredicateTileAdapter.html#a3e41ab145489df08fca79251b2253d0f',1,'cutlass::TrivialPredicateTileAdapter::at()'],['../structcutlass_1_1PredicateTileAdapter.html#a7d54e877bca2e840c142293b4826e986',1,'cutlass::PredicateTileAdapter::at()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#a9e5651009a7b8df9960527c18c7b05dd',1,'cutlass::ConstPredicateTileAdapter::at()'],['../classcutlass_1_1TensorRef.html#a7eff42a37e4dbee488bfa726f3f0df4f',1,'cutlass::TensorRef::at(Coord< Rank > const &coord) const'],['../classcutlass_1_1TensorRef.html#a5702dea703104ab431c098c7b039c215',1,'cutlass::TensorRef::at(int idx) const'],['../classcutlass_1_1TensorView.html#ad894a8b373c413d308cb1b7c7ba545ce',1,'cutlass::TensorView::at(Coord_t const &coord) const'],['../classcutlass_1_1TensorView.html#acc55581896fae8c0449b44b56d750155',1,'cutlass::TensorView::at(Offset_t idx) const']]] +]; diff --git a/docs/generated-html/search/all_10.html b/docs/generated-html/search/all_10.html new file mode 100644 index 00000000..50bc449e --- /dev/null +++ b/docs/generated-html/search/all_10.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_10.js b/docs/generated-html/search/all_10.js new file mode 100644 index 00000000..19828a38 --- /dev/null +++ b/docs/generated-html/search/all_10.js @@ -0,0 +1,20 @@ +var searchData= +[ + ['rank',['Rank',['../classcutlass_1_1TensorRef.html#a22ac53a60e63a743613e732586ad0c66',1,'cutlass::TensorRef::Rank()'],['../classcutlass_1_1TensorView.html#a22c39e8cf314884c5d523914cf4cac90',1,'cutlass::TensorView::Rank()']]], + ['ref',['ref',['../classcutlass_1_1TensorView.html#a8650860460ea24944c803a671095be09',1,'cutlass::TensorView::ref()'],['../classcutlass_1_1TensorView.html#a5cbff89d3d8dc71d27a4d6c1d7abb58a',1,'cutlass::TensorView::ref() const']]], + ['registers',['registers',['../unioncutlass_1_1Vector.html#a29dab07949206cc1609543ffcefd1e5a',1,'cutlass::Vector::registers()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#abd116dc7a5b82ac9b1481fb1d2bfc93f',1,'cutlass::Vector< half, kLanes_ >::registers()']]], + ['release',['release',['../classcutlass_1_1platform_1_1unique__ptr.html#a7ac06ebe7bc66573d3225891e12d2279',1,'cutlass::platform::unique_ptr']]], + ['remove_5fconst',['remove_const',['../structcutlass_1_1platform_1_1remove__const.html',1,'cutlass::platform']]], + ['remove_5fconst_3c_20const_20t_20_3e',['remove_const< const T >',['../structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html',1,'cutlass::platform']]], + ['remove_5fcv',['remove_cv',['../structcutlass_1_1platform_1_1remove__cv.html',1,'cutlass::platform']]], + ['remove_5fvolatile',['remove_volatile',['../structcutlass_1_1platform_1_1remove__volatile.html',1,'cutlass::platform']]], + ['remove_5fvolatile_3c_20volatile_20t_20_3e',['remove_volatile< volatile T >',['../structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['reset',['reset',['../classcutlass_1_1TensorRef.html#abefe392e81da2c09cb127f963ae90674',1,'cutlass::TensorRef::reset()'],['../classcutlass_1_1TensorView.html#a8b1785a1ea5d7aa7eba8e45297d539d3',1,'cutlass::TensorView::reset()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a6740f71511f5495d6038cf8878862331',1,'cutlass::platform::unique_ptr::reset()']]], + ['reshape_5ftile_2eh',['reshape_tile.h',['../reshape__tile_8h.html',1,'']]], + ['reshapethreads',['ReshapeThreads',['../structcutlass_1_1gemm_1_1ReshapeThreads.html',1,'cutlass::gemm']]], + ['reshapethreads_3c_20tile_5f_2c_20threads_5f_2c_20true_20_3e',['ReshapeThreads< Tile_, Threads_, true >',['../structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html',1,'cutlass::gemm']]], + ['reshapetile',['ReshapeTile',['../structcutlass_1_1ReshapeTile.html',1,'cutlass']]], + ['reshapetile_3c_20tile_5f_2c_20kaccesssize_5f_2c_20true_20_3e',['ReshapeTile< Tile_, kAccessSize_, true >',['../structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html',1,'cutlass']]], + ['residue',['residue',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#aae1adef6312e069e59a83d38c03116f9',1,'cutlass::gemm::GlobalLoadStreamBase::residue()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#aab37ea6c47e34466371314ed3971dc7b',1,'cutlass::gemm::GemmGlobalIteratorAb::residue()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a405b93680bb6e356369863244d0b56aa',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::residue()']]], + ['round_5fnearest',['round_nearest',['../namespacecutlass.html#a17c8c408d672d26f1c70d2435f6ac83e',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_11.html b/docs/generated-html/search/all_11.html new file mode 100644 index 00000000..b35c8bf0 --- /dev/null +++ b/docs/generated-html/search/all_11.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_11.js b/docs/generated-html/search/all_11.js new file mode 100644 index 00000000..4f0bed2e --- /dev/null +++ b/docs/generated-html/search/all_11.js @@ -0,0 +1,89 @@ +var searchData= +[ + ['scalar',['Scalar',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a0d38914bf97084e04102e7897aee4295',1,'cutlass::gemm::GemmEpilogue::Scalar()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a006e50cf5fb67407d41c60d6d08b8b66',1,'cutlass::gemm::GemmEpilogueTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ae2b82b9b62aefa15005091bb84ac20e8',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Scalar()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afbbf15a7b5e4c38e59bf1debf67f04d6',1,'cutlass::gemm::GlobalLoadStreamBase::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a6894b653fffa59bcb847bc3295643d6b',1,'cutlass::gemm::GemmGlobalTileTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a5817b81c7013db9a3f7394ad4b1db79a',1,'cutlass::gemm::GemmGlobalIteratorAb::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6b5b207eb1147e9669215e192901df9e',1,'cutlass::gemm::GemmGlobalIteratorCd::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a8b04fd003fc2db46d749360e8838438b',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#aaa439a0bb6b9de5e2722ea7b011effea',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a1b6956adc65254202864520b668edd14',1,'cutlass::gemm::GemmSharedLoadTileATraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a2a6065e583155b3e389253d3bfb64d73',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a9a2218b570dada2f1e3ccd8004c47856',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a1b025cb056729706f36469e74a9799dc',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af511f0ff83166b2a77d4cad4150c8e8f',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ac618881d66790e4c280dc5692e5ddf95',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a8ae7db3f2f0c57779729d500386c004c',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a7639ccd7f6419a9f232db173a228e756',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ae4128bba3f1df6ef7824e2db79745b00',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html#ab1068ba72468f9ede1d05ba41ea31317',1,'cutlass::gemm::IgemmEpilogueScalar::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html#a0983fd25494f6a7ed5af37a02e99f650',1,'cutlass::gemm::IgemmEpilogueScalar< int >::Scalar()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#ae6b053ca059932f7c0d3c99243854183',1,'cutlass::gemm::LinearScaling::Scalar()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab9979f3f1f6d31e1466780c5777de25e',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Scalar()'],['../structcutlass_1_1TileIteratorBase.html#a17163e93d7d3616b4950925f72bb4c16',1,'cutlass::TileIteratorBase::Scalar()'],['../structcutlass_1_1TileLoadIterator.html#ae8dff52e619f06fbdbca8cb847c79895',1,'cutlass::TileLoadIterator::Scalar()'],['../structcutlass_1_1TileStoreIterator.html#ad52318b430437575b55099ca992ca3a7',1,'cutlass::TileStoreIterator::Scalar()'],['../unioncutlass_1_1Vector.html#a56875d7cbf921261e68e1f63212db5bd',1,'cutlass::Vector::Scalar()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a03199df1287d263f7267239c014f1d9b',1,'cutlass::Vector< half, kLanes_ >::Scalar()'],['../structcutlass_1_1VectorTraits.html#ab3b49d7fb52050c13e50e3c75bf72599',1,'cutlass::VectorTraits::Scalar()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aaf35570b10829356762dcec925a5b4bc',1,'cutlass::VectorTraits< Vector< T, Lanes > >::Scalar()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a6e99dde8432b13472971dc41573a574e',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::Scalar()']]], + ['scalara',['ScalarA',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a6fa76b3e7ac721d47df47eba4e9ef222',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarA()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a366083b229b28e7f44da38273b2ab263',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarA()'],['../structcutlass_1_1gemm_1_1Gemm.html#a6fcf9daef57558e1bb932c6eba99721b',1,'cutlass::gemm::Gemm::ScalarA()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a9d1e4e364be8fd9de5e1199d93ad76aa',1,'cutlass::gemm::GemmConfig::ScalarA()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a96d64bdc48db4971798b620d6b49b3f6',1,'cutlass::gemm::GemmTraits::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a236a408791a38358cbadf19dd0e8ed9f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#aeef5fa0437b4ce1c2e8ac4bc7e062b65',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a382242001b4c8e18ea5f2de724902217',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarA()']]], + ['scalarb',['ScalarB',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#af4f5c4a79c447e5aaf313878eca022cb',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarB()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#af52ec4b92a3e788169764014aebb85a1',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarB()'],['../structcutlass_1_1gemm_1_1Gemm.html#ae6f11bb666c2c8510e99200a2c0fc2f4',1,'cutlass::gemm::Gemm::ScalarB()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#aa13d6f5e5ad907ef09c88ae49e6e8e9b',1,'cutlass::gemm::GemmConfig::ScalarB()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#aa0e8fd28f5247764dfb7843f7670c698',1,'cutlass::gemm::GemmTraits::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#ac7557562de1108bf1abc10829c83e88f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#aaf9e4b8b16150a6ad826c228af2bf103',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a42d181e7f4d0d0a15e1c911d3498b767',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarB()']]], + ['scalarc',['ScalarC',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a92c1ffbfb479cd9fa2c2632ef8e347d3',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarC()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#af553be8ef0b4dc9bb593d98dfce8628d',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarC()'],['../structcutlass_1_1gemm_1_1Gemm.html#a71f0c91768a1a87e94030c8c2db51e55',1,'cutlass::gemm::Gemm::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#abb0741601652df8fdf927d49c2c0e4d0',1,'cutlass::gemm::GemmEpilogue::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#abf97949c238d72854225c1c6131b5cbc',1,'cutlass::gemm::GemmEpilogueTraits::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#ad8f262d7da093d07cdd5c6a4fd9aceea',1,'cutlass::gemm::GemmConfig::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a8f78d4a68817760099081523aa7fd443',1,'cutlass::gemm::GemmTraits::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#af1a6d91d4734683ea791bf57f3c3bbb0',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#acdd554e996a712ff62eb70d6ecf8e116',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a1af758cb98c33060462a2706856b0a01',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarC()']]], + ['scalard',['ScalarD',['../structcutlass_1_1gemm_1_1Gemm.html#ae2aa3663f9f6f5708e816dcf7cd66694',1,'cutlass::gemm::Gemm::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a4887b56a96694ce6350db77f78bb505f',1,'cutlass::gemm::GemmEpilogue::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a1ee74d6f89b044578e1cd6dd210ce5fe',1,'cutlass::gemm::GemmEpilogueTraits::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a188ef7f4c49ff2830753218343a1b8f8',1,'cutlass::gemm::GemmConfig::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a3129be75ee087603170f8367e10e070e',1,'cutlass::gemm::GemmTraits::ScalarD()']]], + ['scalarepilogue',['ScalarEpilogue',['../structcutlass_1_1gemm_1_1Gemm.html#a9349fc5f20215c1c6508e250b0b4e936',1,'cutlass::gemm::Gemm']]], + ['scalars',['scalars',['../unioncutlass_1_1Vector.html#a091080b4e9db9e89734f44ceb985d78f',1,'cutlass::Vector::scalars()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#ab4a119a4813f80aa10c25e32f8b115f3',1,'cutlass::Vector< half, kLanes_ >::scalars()']]], + ['set',['set',['../classcutlass_1_1PredicateVector_1_1Iterator.html#aadfd039b5622098c9e46706a27122575',1,'cutlass::PredicateVector::Iterator::set()'],['../structcutlass_1_1PredicateVector.html#a062fa8a8df725ef08ced2ffcca8336af',1,'cutlass::PredicateVector::set()'],['../structcutlass_1_1PredicateTileAdapter.html#aeda47efdda0387f9c3c7b31f836afca5',1,'cutlass::PredicateTileAdapter::set()']]], + ['sgemm_5ftraits_2eh',['sgemm_traits.h',['../sgemm__traits_8h.html',1,'']]], + ['sgemmconfig',['SgemmConfig',['../structcutlass_1_1gemm_1_1SgemmConfig.html',1,'cutlass::gemm']]], + ['sgemmtraits',['SgemmTraits',['../structcutlass_1_1gemm_1_1SgemmTraits.html',1,'cutlass::gemm']]], + ['shape',['Shape',['../structcutlass_1_1Shape.html',1,'cutlass::Shape< kD_, kH_, kW_, kC_ >'],['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a89f1d9599b418c8bb81c104ca86cf00e',1,'cutlass::gemm::GemmMultiplicandTraits::Shape()'],['../structcutlass_1_1ShapeScale.html#aae9cfc35c517cd89018e4f914acbac29',1,'cutlass::ShapeScale::Shape()'],['../structcutlass_1_1ShapeAdd.html#ad4712a1339445038949445de1dd74e71',1,'cutlass::ShapeAdd::Shape()'],['../structcutlass_1_1ShapeSub.html#a24b6dd8cb6171b85c4e2f37407f9a5c9',1,'cutlass::ShapeSub::Shape()'],['../structcutlass_1_1ShapeMul.html#a8875fc5e861339f981360ed774e8cc94',1,'cutlass::ShapeMul::Shape()'],['../structcutlass_1_1ShapeDiv.html#a108ded386ef6708afc6fe769a77a234b',1,'cutlass::ShapeDiv::Shape()'],['../structcutlass_1_1ShapeMax.html#ad566aceac2563024982eeabb78c6c961',1,'cutlass::ShapeMax::Shape()'],['../structcutlass_1_1ShapeMin.html#a5c813e4c34ea612431d31b36120f8549',1,'cutlass::ShapeMin::Shape()'],['../structcutlass_1_1ShapeStrides.html#ac6fcda9b8e1782f24c1e6d67cd880a6a',1,'cutlass::ShapeStrides::Shape()']]], + ['shape_2eh',['shape.h',['../shape_8h.html',1,'']]], + ['shapeadd',['ShapeAdd',['../structcutlass_1_1ShapeAdd.html',1,'cutlass']]], + ['shapecount',['ShapeCount',['../structcutlass_1_1ShapeCount.html',1,'cutlass']]], + ['shapediv',['ShapeDiv',['../structcutlass_1_1ShapeDiv.html',1,'cutlass']]], + ['shapemax',['ShapeMax',['../structcutlass_1_1ShapeMax.html',1,'cutlass']]], + ['shapemin',['ShapeMin',['../structcutlass_1_1ShapeMin.html',1,'cutlass']]], + ['shapemul',['ShapeMul',['../structcutlass_1_1ShapeMul.html',1,'cutlass']]], + ['shapescale',['ShapeScale',['../structcutlass_1_1ShapeScale.html',1,'cutlass']]], + ['shapestrides',['ShapeStrides',['../structcutlass_1_1ShapeStrides.html',1,'cutlass']]], + ['shapesub',['ShapeSub',['../structcutlass_1_1ShapeSub.html',1,'cutlass']]], + ['shared',['shared',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html#afabd328b106d45b156200f73942d211e',1,'cutlass::gemm::GemmTraits::StreamSharedStorage']]], + ['shared_5fiterator_5fload',['shared_iterator_load',['../namespacecutlass.html#abcec976c59cab75ca55b338d125154a3',1,'cutlass::shared_iterator_load(InputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#aa9416026c6db08d92a34c2ac08fea8c3',1,'cutlass::shared_iterator_load(InputIterator &iterator, Fragment &fragment, int d)']]], + ['shared_5fiterator_5fstore',['shared_iterator_store',['../namespacecutlass.html#a705c6d75513e112d2731d1c40f4cf109',1,'cutlass']]], + ['shared_5fload_5ffence',['shared_load_fence',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9b5e42f222fec98ff479bc1650221b84',1,'cutlass::gemm::GemmEpilogue::shared_load_fence()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a475463c1e3af71598e22da8956900ebe',1,'cutlass::gemm::GemmTraits::shared_load_fence()']]], + ['shared_5fload_5fiterator_5fd',['shared_load_iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a1742e43c128665f0ca39cb578291df81',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['shared_5fstorage',['shared_storage',['../structcutlass_1_1gemm_1_1Gemm.html#a6b0119ed8d92698dab4de68987c8cc1b',1,'cutlass::gemm::Gemm::shared_storage()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a442b5b5688cd658c3b3476650c00281e',1,'cutlass::gemm::GemmEpilogue::shared_storage()']]], + ['shared_5fstore_5ffence',['shared_store_fence',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac1b2a16b4ccf3e9617faf4d8a2c43691',1,'cutlass::gemm::GemmEpilogue::shared_store_fence()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#ac3c840a3d90c0da43301761af83c2c9f',1,'cutlass::gemm::GemmTraits::shared_store_fence()']]], + ['shared_5fstore_5fiterator_5fd',['shared_store_iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#af79a0c74a4c30ccec59b393721b5dfc1',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['shared_5fstream',['shared_stream',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html#ae63b5a52106dbd37ea304196335ec210',1,'cutlass::gemm::GemmEpilogueTraits::SharedStorage']]], + ['shared_5fstream_5fa',['shared_stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aa9937ec51d18aad02398d95095117978',1,'cutlass::gemm::GemmTraits::Params']]], + ['shared_5fstream_5fb',['shared_stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a78f22007632937bbd5f3dab7b097477d',1,'cutlass::gemm::GemmTraits::Params']]], + ['sharedloaditeratora',['SharedLoadIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a365aed4c0e2ad1bffea517ee36998557',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a1bbb198a50b5f01a0502df44bb678620',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#aa93043ac87d89ce7fb991c9195c3bf99',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadIteratorA()']]], + ['sharedloaditeratorb',['SharedLoadIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a4de905aadc734df69fd0db83f01be56e',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a8d09409973094ca2a17633776a64a303',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a42322b9b10e894fe157e527b378c59f8',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadIteratorB()']]], + ['sharedloaditeratord',['SharedLoadIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a4a0b439f8a57d8e67174ecbd96183070',1,'cutlass::gemm::GemmEpilogue::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a9822fa405b32cc2f471c9fdd37585cb5',1,'cutlass::gemm::GemmEpilogueTraits::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#adbff60de6f90ef4d5ae0c7096692e2c0',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad33ee44527a7fcfd41b4e677927fd4fa',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedLoadIteratorD()']]], + ['sharedloadstream',['SharedLoadStream',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html',1,'cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a6e097738679436d580e8dc6ac70efaad',1,'cutlass::gemm::SharedLoadStream::SharedLoadStream()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a93e9bcdca4ceb68754fb1f73e2b25d25',1,'cutlass::gemm::SharedLoadStream::SharedLoadStream(Params const &params, SharedStorage &shared_storage)'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a49315aea1c54d84ff19b0ac215128b95',1,'cutlass::gemm::GemmTraits::SharedLoadStream::SharedLoadStream()']]], + ['sharedloadstreama',['SharedLoadStreamA',['../structcutlass_1_1gemm_1_1GemmTraits.html#ae01371eb31b88fa83c4926564cecafdc',1,'cutlass::gemm::GemmTraits::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#aa5ebe3a857b55412a86ec65ad1c55dd8',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a21c860cc877df13d22dd30eeb5e2b06b',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a70063eb7e19921efef55a6f32562773f',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadStreamA()']]], + ['sharedloadstreamb',['SharedLoadStreamB',['../structcutlass_1_1gemm_1_1GemmTraits.html#acaeb27063a444e2a3b93f3cb70e3c290',1,'cutlass::gemm::GemmTraits::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a12447ce4d11601a625662f9d177cc3d8',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ac5eeca1e91f0e0d4dd48d432d5213215',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a54e8ad5874306a3764951a9791f02c96',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadStreamB()']]], + ['sharedloadtiletraits',['SharedLoadTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ab8ba28fd1da48fcabbafc0de91281b46',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af534fc5698513af3c6724b68ae03316d',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a1125408805bc697755f2b16594c6c8e1',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a118bb34a6f58c3e5a989773b4b597d8c',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a9335aca8b152ff1167763de8ff8fb882',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a458cbcc16fc296d024f2a1a95fb926c1',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af1bc7f7c26db3399201cd95f35a56790',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a851113bffb5b656c5c649845852b3b8d',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedLoadTileTraits()']]], + ['sharedloadtransformerd',['SharedLoadTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a132cabbc1402c87c7b35dea427001a13',1,'cutlass::gemm::GemmEpilogue']]], + ['sharedstorage',['SharedStorage',['../structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html',1,'cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorage'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage'],['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html',1,'cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage'],['../structcutlass_1_1gemm_1_1Gemm.html#ad10627d508fad0efae1fb91b26d7a6b7',1,'cutlass::gemm::Gemm::SharedStorage()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac36dad8a7b6bc7fc6ef88e44068468dc',1,'cutlass::gemm::GemmEpilogue::SharedStorage()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a22c671494d487511c71f2b0f26fdb404',1,'cutlass::gemm::SharedLoadStream::SharedStorage()'],['../structcutlass_1_1TileLoadIterator.html#ab457bd7953af9ef418510f55f52d1f39',1,'cutlass::TileLoadIterator::SharedStorage()'],['../structcutlass_1_1TileStoreIterator.html#ab7922305d47b67e6cfb439e4e8d9f09b',1,'cutlass::TileStoreIterator::SharedStorage()']]], + ['sharedstorefragmentd',['SharedStoreFragmentD',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a5e64440830b36899f9c0ed8b369665c8',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['sharedstoreiteratora',['SharedStoreIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a3a20852daeb46c625b2391d078b30d73',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedStoreIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a7f022d423d42d4081cefa7eb26b4d5b4',1,'cutlass::gemm::HgemmTraitsHelper::SharedStoreIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ae187303a8da63f36960687a4730f4c46',1,'cutlass::gemm::IgemmTraitsHelper::SharedStoreIteratorA()']]], + ['sharedstoreiteratorb',['SharedStoreIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a43713f534798b1e27c4ba38b72e63c08',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedStoreIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#abe3383e7338c08841fd8f0bfb1090448',1,'cutlass::gemm::HgemmTraitsHelper::SharedStoreIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a4d6658f3a3b53760b10a3da9c807b81f',1,'cutlass::gemm::IgemmTraitsHelper::SharedStoreIteratorB()']]], + ['sharedstoreiteratord',['SharedStoreIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#aab0a964efe223c5c29bc816c393b5a9a',1,'cutlass::gemm::GemmEpilogue::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a74f4beb86447f6b613e9b60234cb27bc',1,'cutlass::gemm::GemmEpilogueTraits::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a02a517fd246fb961727d3bd1b4f954be',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#af7024128202d642d3535e1ae5cf5f43d',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreIteratorD()']]], + ['sharedstorestorage',['SharedStoreStorage',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a69092e298d5723028fc24235d72f87fa',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['sharedstorestoragea',['SharedStoreStorageA',['../structcutlass_1_1gemm_1_1GemmTraits.html#a8d49ad32fc9d8c14f6141690962c3f9c',1,'cutlass::gemm::GemmTraits']]], + ['sharedstorestorageb',['SharedStoreStorageB',['../structcutlass_1_1gemm_1_1GemmTraits.html#a438b80cd8d8df0e74014ae47a162f7ed',1,'cutlass::gemm::GemmTraits']]], + ['sharedstoretiletraits',['SharedStoreTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a3a0fb3a914bfd009ff2e3918bcd231a9',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#aaa198fed841af6bf26bf2e9544d0a877',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ad6511b7c2d84a9f6c3ed3639269ac44f',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a1884cbc21987aec651fa8149d4ed1a06',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#acbeea56f0ce95ddd632db3482c1021e5',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a2aad3b2454d956f20dac1bb0ad75a2f8',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ab1ae3d51f65f7af60147da1c51a7a0c2',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad7659dc0eaa491447ad127ef7098924f',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a7624585480f83a46725c92b5dee20ebc',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aca6118b5bbe6f667f05c53bd52543045',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()']]], + ['sharedstoretransformerd',['SharedStoreTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9063e7fc044a679652d5a3a31aa77e7c',1,'cutlass::gemm::GemmEpilogue::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a0b8ac1972b2f2cff48070f8b862ed25c',1,'cutlass::gemm::GemmEpilogueTraits::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aa5cea8dbebda9a12a503ae1416c4da33',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a00000e0cd14b9e6e242eafb5133af8cf',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreTransformerD()']]], + ['simplifiedgemmepiloguetraits',['SimplifiedGemmEpilogueTraits',['../structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraits',['SimplifiedGemmTraits',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraits_3c_20klayouta_5f_2c_20klayoutb_5f_2c_20gemmconfig_5f_2c_20gemmepilogue_3c_20gemmepiloguetraits_5f_20_3e_2c_20index_5f_20_3e',['SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, GemmEpilogue< GemmEpilogueTraits_ >, Index_ >',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraitshelper',['SimplifiedGemmTraitsHelper',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html',1,'cutlass::gemm']]], + ['size',['size',['../classcutlass_1_1TensorView.html#a541a7c22e7109d4059044f146fe69027',1,'cutlass::TensorView::size() const'],['../classcutlass_1_1TensorView.html#a6218d8555679966eab784a6bb1fa4ed1',1,'cutlass::TensorView::size(int dim) const']]], + ['skew',['Skew',['../structcutlass_1_1TileIteratorBase.html#ae89afbcf642b3023770ff22969c51d16',1,'cutlass::TileIteratorBase::Skew()'],['../structcutlass_1_1TileLoadIterator.html#a11ec4297c9a1352c8005ac222892b35c',1,'cutlass::TileLoadIterator::Skew()'],['../structcutlass_1_1TileStoreIterator.html#a57348779bb004ed1ea0fd9cc252e895d',1,'cutlass::TileStoreIterator::Skew()']]], + ['sqrt_5fest',['sqrt_est',['../structcutlass_1_1sqrt__est.html',1,'cutlass']]], + ['stage',['stage',['../structcutlass_1_1TileLoadIterator.html#aa3fd9859de68d76e07ebee06c6ccee92',1,'cutlass::TileLoadIterator::stage()'],['../structcutlass_1_1TileStoreIterator.html#ae435b72b15eca46eb871446d92bd316e',1,'cutlass::TileStoreIterator::stage()']]], + ['static_5fassert',['static_assert',['../platform_8h.html#adde4c9ea91b753491851361a4198c009',1,'platform.h']]], + ['storage',['Storage',['../structcutlass_1_1PredicateVector.html#afe85a07b9f311327c6bf04e3a5f94e5a',1,'cutlass::PredicateVector::Storage()'],['../classcutlass_1_1TensorRef.html#a604921388cb7ee18ddb8127b8ca2f7fd',1,'cutlass::TensorRef::Storage()'],['../structcutlass_1_1TileIteratorBase.html#a6ca47fd6e2f9cbb3498c138417ea414a',1,'cutlass::TileIteratorBase::Storage()']]], + ['storagetype',['StorageType',['../structcutlass_1_1StorageType.html',1,'cutlass']]], + ['storagetype_3c_201_20_3e',['StorageType< 1 >',['../structcutlass_1_1StorageType_3_011_01_4.html',1,'cutlass']]], + ['storagetype_3c_202_20_3e',['StorageType< 2 >',['../structcutlass_1_1StorageType_3_012_01_4.html',1,'cutlass']]], + ['storagetype_3c_204_20_3e',['StorageType< 4 >',['../structcutlass_1_1StorageType_3_014_01_4.html',1,'cutlass']]], + ['store',['Store',['../structcutlass_1_1Store.html',1,'cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >'],['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html#a1f31090613c4e6f0895f598880d6c4e5',1,'cutlass::gemm::GemmEpilogueTraits::StreamSharedStorage::store()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html#a118c78aa6b0ae0f0c78889689b6878c8',1,'cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html#a45319520b7d341c66bd54d3e8fec48f8',1,'cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store()'],['../structcutlass_1_1Store.html#a1117fa7b7bdeeb3a7f2d647a1d340aaf',1,'cutlass::Store::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a00f6bb93d318bf4cff35c9dabc630167',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a027980b8456243974b0c442866a66e3a',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::store()'],['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#ab70d04589637f285f861902f649f834e',1,'cutlass::Store< double, 2, Memory_, true, 16 >::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#aa130564bb2eba7b07e1f183c98f1d9e2',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::store()'],['../structcutlass_1_1TileStoreIterator.html#a53820de506cecb1f5fb07b3385d8272a',1,'cutlass::TileStoreIterator::store(Fragment &fragment, PredicateIterator pred_it) const'],['../structcutlass_1_1TileStoreIterator.html#a60258b7c1a1708f97e28f8f6c292bfe4',1,'cutlass::TileStoreIterator::store(Fragment &fragment) const']]], + ['store_3c_20double_2c_202_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Store< double, 2, Memory_, true, 16 >',['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 16 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_204_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 4 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_208_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 8 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html',1,'cutlass']]], + ['store_5fiterator',['store_iterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a3e5167fa3f2dc0d8b4b903bd4e936969',1,'cutlass::gemm::GlobalLoadStreamBase::Params::store_iterator()'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html#a939e9ddecc5ee97882a54211a61f5586',1,'cutlass::gemm::GlobalLoadStreamBase::SharedStorage::store_iterator()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0eafd1e245946bd1b9d228ad7d2d0dae',1,'cutlass::gemm::GlobalLoadStreamBase::store_iterator()']]], + ['store_5fpost_5fincrement',['store_post_increment',['../structcutlass_1_1TileStoreIterator.html#a57aa2c36eb6ad9d2500c1f5396b3a526',1,'cutlass::TileStoreIterator::store_post_increment(Fragment &fragment, PredicateIterator pred_it)'],['../structcutlass_1_1TileStoreIterator.html#ae63949f58c1b32959bbfa5b64d521f0f',1,'cutlass::TileStoreIterator::store_post_increment(Fragment &fragment)']]], + ['storeiterator',['StoreIterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a15eee5bf6367a36a5b5c8024437f4834',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['stream_5fa',['stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a62d3dcf5d97a0a896b2033e55dfb0811',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage::stream_a()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a82a59524b5d3134eb609d280193a5c47',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::stream_a()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a8e68561561ac6b08efbfd116903198c8',1,'cutlass::gemm::GemmTraits::SharedLoadStream::stream_a()']]], + ['stream_5fb',['stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a0173fcc8856b17a52cc5eee845f101fa',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage::stream_b()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#acc287ce5e2f3635d9d55d91914d2d04c',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::stream_b()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a1fdc6af44c14c88a94529d187fda176d',1,'cutlass::gemm::GemmTraits::SharedLoadStream::stream_b()']]], + ['streamsharedstorage',['StreamSharedStorage',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >'],['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage']]], + ['streamsharedstorage_3c_20globalloadstreama_2c_20sharedloadstreama_20_3e',['StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamA >',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits']]], + ['streamsharedstorage_3c_20globalloadstreamb_2c_20sharedloadstreamb_20_3e',['StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamB >',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits']]], + ['stride',['stride',['../classcutlass_1_1TensorRef.html#a89380141d25528c4c7ba6c365b96a878',1,'cutlass::TensorRef::stride() const'],['../classcutlass_1_1TensorRef.html#af47f192552544272774a29d7a0829a31',1,'cutlass::TensorRef::stride(int dim) const'],['../classcutlass_1_1TensorView.html#a3ac125a25199fd91f73d2cfe9fc3d09b',1,'cutlass::TensorView::stride() const'],['../classcutlass_1_1TensorView.html#a522630bb0df977282a9bff17e6fee843',1,'cutlass::TensorView::stride(int dim) const']]], + ['stride_5fd',['stride_d',['../structcutlass_1_1TileIteratorBase_1_1Params.html#ad67234ec264354a22032bb2519575dc1',1,'cutlass::TileIteratorBase::Params']]], + ['stride_5fh',['stride_h',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#ae0fdc7426b22ff2c20f077e251ebc823',1,'cutlass::gemm::GemmEpilogueTraits::Params::stride_h()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a0c6b03c635e14ad4424a83f8c7f8025e',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::stride_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a5cff0436eed0fefa2957ad6d083ed007',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::stride_h()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a58e8c883aea4cfdfa5a84c25a4704ebc',1,'cutlass::TileIteratorBase::Params::stride_h()']]], + ['stride_5fw',['stride_w',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a565f6cab8925d632dcf24bd1974caca2',1,'cutlass::gemm::GemmEpilogueTraits::Params::stride_w()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a313984457c78eea66c980f6813047b9c',1,'cutlass::TileIteratorBase::Params::stride_w()']]], + ['strides',['Strides',['../structcutlass_1_1FragmentIterator.html#a2858ba9a8a9bbaef1de73415cff9b3c1',1,'cutlass::FragmentIterator']]], + ['subview',['subview',['../classcutlass_1_1TensorView.html#aee43c516397d7c06eb8012711d8d7c15',1,'cutlass::TensorView']]], + ['swap',['swap',['../classcutlass_1_1platform_1_1unique__ptr.html#a748d413c50bdbbe9e2f9986fbc423036',1,'cutlass::platform::unique_ptr::swap()'],['../namespacecutlass_1_1platform.html#a3e83320a39137d92042eb0bf93be9678',1,'cutlass::platform::swap()']]], + ['swizzle',['swizzle',['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html#a0a366c072ee66bbcb390acd7b8bbe5f8',1,'cutlass::gemm::IdentityBlockSwizzle']]] +]; diff --git a/docs/generated-html/search/all_12.html b/docs/generated-html/search/all_12.html new file mode 100644 index 00000000..fd265245 --- /dev/null +++ b/docs/generated-html/search/all_12.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_12.js b/docs/generated-html/search/all_12.js new file mode 100644 index 00000000..cd80c9c1 --- /dev/null +++ b/docs/generated-html/search/all_12.js @@ -0,0 +1,54 @@ +var searchData= +[ + ['tensor_5fref_2eh',['tensor_ref.h',['../tensor__ref_8h.html',1,'']]], + ['tensor_5fview_2eh',['tensor_view.h',['../tensor__view_8h.html',1,'']]], + ['tensorref',['TensorRef',['../classcutlass_1_1TensorRef.html',1,'cutlass::TensorRef< Storage_, Rank_ >'],['../classcutlass_1_1TensorRef.html#a54f6edc293b0b8ac97f02e8ab951c478',1,'cutlass::TensorRef::TensorRef()'],['../classcutlass_1_1TensorRef.html#ae48325312183ff61dbd312c64f31fcb8',1,'cutlass::TensorRef::TensorRef(Storage *ptr, Coord< Rank > stride)']]], + ['tensorref_3c_20t_2c_204_20_3e',['TensorRef< T, 4 >',['../classcutlass_1_1TensorRef.html',1,'cutlass']]], + ['tensorref_5ft',['TensorRef_t',['../classcutlass_1_1TensorView.html#a762fc3d887ab14f4c7bcde85f0af16ab',1,'cutlass::TensorView']]], + ['tensorview',['TensorView',['../classcutlass_1_1TensorView.html',1,'cutlass::TensorView< T >'],['../classcutlass_1_1TensorView.html#a22401348796d603546e44d6c196018dc',1,'cutlass::TensorView::TensorView()'],['../classcutlass_1_1TensorView.html#a80480aa986a488a106a9b0aea331c317',1,'cutlass::TensorView::TensorView(TensorRef_t const &_ref, Coord_t const &_size)']]], + ['this_5f',['This_',['../structcutlass_1_1Fragment.html#a32f7ff86b73576a15c5ddaa40c4e0a95',1,'cutlass::Fragment::This_()'],['../structcutlass_1_1FragmentIterator.html#ae320d9672450f5341abcdb24a8b09369',1,'cutlass::FragmentIterator::This_()'],['../structcutlass_1_1FragmentConstIterator.html#add14f695231c2bdd6284bf22b1e66f8f',1,'cutlass::FragmentConstIterator::This_()'],['../structcutlass_1_1gemm_1_1Gemm.html#a26c13e8bbad805760443ef6df475e317',1,'cutlass::gemm::Gemm::This_()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a2892be253a3de5bffc3edcef2890d3a8',1,'cutlass::gemm::GemmGlobalIteratorAb::This_()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6a745d66c4c7de352041f779e54e6b2b',1,'cutlass::gemm::GemmGlobalIteratorCd::This_()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa8b453116c2d96ea2c56e08cb981346c',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::This_()']]], + ['thread_5fmultiply_5fadd_2eh',['thread_multiply_add.h',['../thread__multiply__add_8h.html',1,'']]], + ['thread_5foffset',['thread_offset',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a1864c5556529afdc8445021cad780b04',1,'cutlass::gemm::GemmGlobalIteratorAb::thread_offset()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a56601dc34e8f9a070db5dc48c37d55a0',1,'cutlass::gemm::GemmGlobalIteratorCd::thread_offset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab3057dad7a4decb5594c66aa328f8066',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::thread_offset()'],['../structcutlass_1_1TileLoadIterator.html#a7726cdd4fe056c59bb04adb9e5504457',1,'cutlass::TileLoadIterator::thread_offset()'],['../structcutlass_1_1TileStoreIterator.html#a350f5beea87d811f43c55519bc0b9035',1,'cutlass::TileStoreIterator::thread_offset()']]], + ['threadblocktile',['ThreadBlockTile',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a5e43f3c9aa8d7dc5f01dfc63b1ea97dc',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['threadmultiplyadd',['ThreadMultiplyAdd',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#acec155117a56c942c5e695984b0f072d',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadMultiplyAdd()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a9b75e499f4c14369b5c86051dceeb81d',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadMultiplyAdd()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ab271a3f11ccde4b629ddb11b78c0d555',1,'cutlass::gemm::ThreadMultiplyAdd::ThreadMultiplyAdd()']]], + ['threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20threadsperwarp_5f_2c_20half_2c_20half_2c_20half_20_3e',['ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html',1,'cutlass::gemm']]], + ['threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20threadsperwarp_5f_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e',['ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html',1,'cutlass::gemm']]], + ['threadoffset',['ThreadOffset',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset'],['../structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html',1,'cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#afd09d3b8e5ca04eab7edc2e5723816e5',1,'cutlass::gemm::GemmGlobalIteratorAb::ThreadOffset()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6d985f8e93be21e56f72ec1400d73df1',1,'cutlass::gemm::GemmGlobalIteratorCd::ThreadOffset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a667cae4a9fa78a6df073f5ee48ef9664',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::ThreadOffset()'],['../structcutlass_1_1TileTraits.html#af9c0fc178dac7f9dac8d254da34e04dd',1,'cutlass::TileTraits::ThreadOffset()'],['../structcutlass_1_1TileIteratorBase.html#a5abf4755aee07dc58b1d6183fbf4786f',1,'cutlass::TileIteratorBase::ThreadOffset()'],['../structcutlass_1_1TileLoadIterator.html#a8a1527b4b469ae1f97afde2502ece70d',1,'cutlass::TileLoadIterator::ThreadOffset()'],['../structcutlass_1_1TileStoreIterator.html#a6a6f51f459f98c0cddeacf476660cd27',1,'cutlass::TileStoreIterator::ThreadOffset()'],['../structcutlass_1_1TileTraitsStrideMajor.html#ae8d14a3c6871072febfd75ed08aba32c',1,'cutlass::TileTraitsStrideMajor::ThreadOffset()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a823ba83e9ca680da0af7d63be772a351',1,'cutlass::TileTraitsContiguousMajor::ThreadOffset()']]], + ['threads',['Threads',['../structcutlass_1_1gemm_1_1ReshapeThreads.html#afd3614ff45f0fc77ad4967951cb5ab57',1,'cutlass::gemm::ReshapeThreads::Threads()'],['../structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html#a894932ad04fae3aea06eb6d259e01c1c',1,'cutlass::gemm::ReshapeThreads< Tile_, Threads_, true >::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a29bd05960cc541bb67098f5483c84cf6',1,'cutlass::gemm::GemmGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a9aff3e2ff0db5a5169257e964e5895c6',1,'cutlass::gemm::GemmGlobalTileCdTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a33e4dcd4449f324fed5ceaa2cde01b50',1,'cutlass::gemm::GemmGlobalIteratorAb::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#afdd08b4f4c1feaa426f997d15cd28c02',1,'cutlass::gemm::GemmGlobalIteratorCd::Threads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a1acf2a1d8bf73fda142e7d82e05f00a2',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a9bef06b59f27c6e673066a7f0280aa06',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Threads()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#ae7a4f120805421ac0712604723612b7e',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a5fd1a9f132c7aa0f68e129553f519d1e',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aeb866237318ac7983e554a08395c5125',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Threads()']]], + ['threadsdelta',['ThreadsDelta',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a65f9ccd630dde0c9db5358cfc951583d',1,'cutlass::gemm::GemmGlobalTileTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#ae2f8331619e735e620f8a8cf2cdde077',1,'cutlass::gemm::GemmGlobalTileCdTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#a6eee97f03dcea1c441116e143cf58018',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a2bb0f0820e52417ff77e7a2bdb9ed434',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::ThreadsDelta()']]], + ['threadshape',['ThreadShape',['../structcutlass_1_1TileTraitsStrideMajor.html#a03567f41ce616ebb4cdb309c85820599',1,'cutlass::TileTraitsStrideMajor::ThreadShape()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a33116b67e580292d4e354ca17ecd4167',1,'cutlass::TileTraitsContiguousMajor::ThreadShape()'],['../structcutlass_1_1TileTraitsWarpRake.html#ad6619e0b5d876fafd51c78e39f2c029e',1,'cutlass::TileTraitsWarpRake::ThreadShape()']]], + ['threadsperwarp',['ThreadsPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a0761c497c41a45652368fc0d54def98f',1,'cutlass::gemm::GemmSharedLoadTileATraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#aed92656a074e915d97a1b6a990aeba66',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#adf72ea773b8d4d3eb184f59c8cdf9543',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a9022ffc49b32503fd3639341e7e291a3',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#aa784f29ff453c1656fdea8270454fa55',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a5bc98fd196c1f1e4e3f1bfc621df4f50',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ad2fbba0a70da29af27ed4578577abc5e',1,'cutlass::gemm::ThreadMultiplyAdd::ThreadsPerWarp()']]], + ['threadsstrides',['ThreadsStrides',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ae540e7ea7106552682aa4c97b833b3b1',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ThreadsStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a2053e4b9cb3ed2727c89960354ea0b29',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ThreadsStrides()']]], + ['tile',['Tile',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aebbf8834d0d88f0e5b3e1926db5e6758',1,'cutlass::gemm::GemmGlobalTileTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ab96f324083e51ce4c2b73c18803c69a7',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a74196946c28e98ee60346b0eeede1471',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a9a00be672617162c4c7ac94c7d8980cc',1,'cutlass::gemm::GemmSharedLoadTileATraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ac242508ec46db0493a69a589dbfc19e4',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a2bc41b907417b47f3dca9c3dd358f8bc',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a63f980fea1ff3dd83ac276cfd83a4ce5',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Tile()'],['../structcutlass_1_1ReshapeTile.html#a8d57fe6422aa920d9815a66e5a85b5f5',1,'cutlass::ReshapeTile::Tile()'],['../structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html#a966a9432cf42dfdff8ad6b89ebd74f06',1,'cutlass::ReshapeTile< Tile_, kAccessSize_, true >::Tile()'],['../structcutlass_1_1TileTraits.html#ab831be0adb255eece4f2e12fd9713831',1,'cutlass::TileTraits::Tile()'],['../structcutlass_1_1TileIteratorBase.html#a954ef18acc12d8256a7d4e37683f8c2c',1,'cutlass::TileIteratorBase::Tile()'],['../structcutlass_1_1TileLoadIterator.html#a7f1499ada284c21624487d4d3a5dbd10',1,'cutlass::TileLoadIterator::Tile()'],['../structcutlass_1_1TileStoreIterator.html#a8a87c8ef986e110a01a9226012594a61',1,'cutlass::TileStoreIterator::Tile()'],['../structcutlass_1_1TileTraitsStrideMajor.html#afbb78ece048b868475d4a6802e6894ac',1,'cutlass::TileTraitsStrideMajor::Tile()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a1607d53544302c12278793bc9b283763',1,'cutlass::TileTraitsContiguousMajor::Tile()'],['../structcutlass_1_1TileTraitsWarpRake.html#adcd658d9daf286368a9d51c8c1647f89',1,'cutlass::TileTraitsWarpRake::Tile()'],['../structcutlass_1_1TileTraitsStandard.html#aee3fee526bc4d4820c03665a2f5f166b',1,'cutlass::TileTraitsStandard::Tile()']]], + ['tile_5fiterator_2eh',['tile_iterator.h',['../tile__iterator_8h.html',1,'']]], + ['tile_20load_20iterator_20concept',['Tile Load Iterator Concept',['../group__tile__load__iterator__concept.html',1,'']]], + ['tile_20store_20iterator_20concept',['Tile Store Iterator Concept',['../group__tile__store__iterator__concept.html',1,'']]], + ['tile_20traits_20concept',['Tile Traits Concept',['../group__tile__traits__concept.html',1,'']]], + ['tile_5ftraits_5fstandard_2eh',['tile_traits_standard.h',['../tile__traits__standard_8h.html',1,'']]], + ['tiledthreadoffset',['TiledThreadOffset',['../structcutlass_1_1TiledThreadOffset.html',1,'cutlass']]], + ['tileiteratorbase',['TileIteratorBase',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileiteratorbase_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20advance_5f_2c_20memoryspace_2c_20index_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20iteratorfragment_3a_3akscalar_2c_20shape_3c_200_2c_200_2c_200_2c_200_20_3e_20_3e',['TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileiteratorbase_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20iteratoradvance_3a_3akh_2c_20memoryspace_3a_3akglobal_2c_20index_5f_20_3e',['TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileloaditerator',['TileLoadIterator',['../structcutlass_1_1TileLoadIterator.html',1,'cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >'],['../structcutlass_1_1TileLoadIterator.html#a81c9c0b17bf5f214230ecf10e0690a4e',1,'cutlass::TileLoadIterator::TileLoadIterator()'],['../structcutlass_1_1TileLoadIterator.html#a93e166575be3b2f7489833ae5da23f23',1,'cutlass::TileLoadIterator::TileLoadIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())'],['../structcutlass_1_1TileLoadIterator.html#a53282fa4cb33cfcec79033d26e418af6',1,'cutlass::TileLoadIterator::TileLoadIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())']]], + ['tileloaditerator_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20tiletraits_5f_3a_3amultiplicandtraits_3a_3akkstrided_20_3f_20iteratoradvance_3a_3akh_20_3aiteratoradvance_3a_3akw_2c_20memoryspace_3a_3akglobal_2c_20index_5f_20_3e',['TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >',['../structcutlass_1_1TileLoadIterator.html',1,'cutlass']]], + ['tilestoreiterator',['TileStoreIterator',['../structcutlass_1_1TileStoreIterator.html',1,'cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >'],['../structcutlass_1_1TileStoreIterator.html#aac4d49854d63f632627b6974f9b59dbb',1,'cutlass::TileStoreIterator::TileStoreIterator()'],['../structcutlass_1_1TileStoreIterator.html#a037ccd942359e6bc8640a240b13cd330',1,'cutlass::TileStoreIterator::TileStoreIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())'],['../structcutlass_1_1TileStoreIterator.html#a4f89c5182659de94605300e15c3651b2',1,'cutlass::TileStoreIterator::TileStoreIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())']]], + ['tiletraits',['TileTraits',['../structcutlass_1_1TileTraits.html',1,'cutlass']]], + ['tiletraitscontiguousmajor',['TileTraitsContiguousMajor',['../structcutlass_1_1TileTraitsContiguousMajor.html',1,'cutlass']]], + ['tiletraitsstandard',['TileTraitsStandard',['../structcutlass_1_1TileTraitsStandard.html',1,'cutlass']]], + ['tiletraitsstridemajor',['TileTraitsStrideMajor',['../structcutlass_1_1TileTraitsStrideMajor.html',1,'cutlass']]], + ['tiletraitswarprake',['TileTraitsWarpRake',['../structcutlass_1_1TileTraitsWarpRake.html',1,'cutlass']]], + ['tilewithoutskew',['TileWithoutSkew',['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a050cf5964a2d3683491bc4313ead5450',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::TileWithoutSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a5a5a36fc570e1225b20ce0a48c89d213',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithoutSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a1f35981a6d661635dfbcf7c7a76056a2',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithoutSkew()']]], + ['tilewithoutskew_5f',['TileWithoutSkew_',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a93ae99460695718babaef6d1ef597e38',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithoutSkew_()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a3d8be9ddea1cab53d1b4b3d508f9eab8',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithoutSkew_()']]], + ['tilewithskew',['TileWithSkew',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a72e0214f86cf8b3711d006dcd69d7a17',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a69c7ec2a779718556e6d9119588e791c',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithSkew()']]], + ['traits',['Traits',['../structcutlass_1_1gemm_1_1Gemm.html#a29f52e33e1f1cf150f5062d9ad2590ff',1,'cutlass::gemm::Gemm::Traits()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a645ab6e9e63163ee6bf536717a30fb1b',1,'cutlass::gemm::GemmEpilogue::Traits()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af2b5682b8e6dd13590ec258a44636430',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Traits()'],['../structcutlass_1_1TileIteratorBase.html#ae7add0ee02bbec2c130ebaf608ab0696',1,'cutlass::TileIteratorBase::Traits()'],['../structcutlass_1_1TileLoadIterator.html#a7c6182031d9aa41d0e4a64516723e20a',1,'cutlass::TileLoadIterator::Traits()'],['../structcutlass_1_1TileStoreIterator.html#a6f50a8aec2d7045e9057b93df08172a8',1,'cutlass::TileStoreIterator::Traits()']]], + ['transform',['transform',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a4dd95354137d3cb52752ecdd346a5685',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#aa9fe67c947bf461ba3e3ca48daa34815',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1Copy.html#ab356f0f473aa3fd8df8fb8ddd8e0e9f3',1,'cutlass::Copy::transform(Fragment_ const &src, Fragment_ &dst)'],['../structcutlass_1_1Copy.html#a171f9a44c05b6fb432b0339979de4eb2',1,'cutlass::Copy::transform(InputFragment_ const &src, int offset, Fragment_ &dst)'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ad467ce744bf9d478900fb2661d7a1c26',1,'cutlass::gemm::HgemmSwizzle::transform()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a91ad48362b99a5f96ac1e92e95104f7b',1,'cutlass::gemm::IgemmFloatToInt8Converter::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a819fd33db88a68521108bab2641d73fd',1,'cutlass::gemm::IgemmFloatToInt8Converter::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#aca8a61e8eb1ab33b9c61e2e7d342379d',1,'cutlass::gemm::IgemmInt8ToFloatConverter::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a89e078dbf376da872c3993ccbaf744d3',1,'cutlass::gemm::IgemmInt8ToFloatConverter::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a084917a512c7a411b76a69f86b906811',1,'cutlass::gemm::IgemmSwizzle::transform()']]], + ['transformed_5fa',['transformed_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a883b28ca237b1ec076856232cfee0c6f',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['transformed_5fb',['transformed_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a9369a5f819d2a42997491e0df96f47ef',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['transformed_5ffragment',['transformed_fragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afa97cb1cfebca0d6977b1c8318bedddf',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['transformedfragment',['TransformedFragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afe7503a3304eefd633581d6bc73a0108',1,'cutlass::gemm::GlobalLoadStreamBase::TransformedFragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#aa2227d7fa1edef3f6730c7db41b132b4',1,'cutlass::gemm::SharedLoadStream::TransformedFragment()']]], + ['transformer',['transformer',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a868f82ee87aba37b05721fe8210221c9',1,'cutlass::gemm::GlobalLoadStreamBase::transformer()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#af846390ad0e5b80ccb4e8b95c5fe64a7',1,'cutlass::gemm::SharedLoadStream::transformer()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#aa24bd9f94bea04a148b49b2a97b63fbe',1,'cutlass::gemm::GlobalLoadStreamBase::Transformer()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#ad1f70f0dd1027da1353ff7a38f524904',1,'cutlass::gemm::SharedLoadStream::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a882c10bed18f62ece97f5f20f9de3296',1,'cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a42c5bafcb226623b3326dbd01fc72f3b',1,'cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#aaaccb3f02a857e0c80d2891c6c6dcdb7',1,'cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#ae66bb2c1f87e19278ff471c32e71ea85',1,'cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html#a98aefa95117dbfdf2e577890318a6c13',1,'cutlass::gemm::IgemmGlobalStoreTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html#a52ecdfd8b94d8d7f4881048e11a33aba',1,'cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html#ad3190650741cef20c1aca919eddd9d72',1,'cutlass::gemm::IgemmGlobalLoadTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html#a49c249026be24ec8a66f5eda99cb855c',1,'cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html#a9edd08d595327a8cc3b8da50622b3bd2',1,'cutlass::gemm::IgemmSharedStoreTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a0b53e18f109ac0fd116e0d01ed6ec197',1,'cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a8a4e3ce1174789e2b695bda7b863079f',1,'cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a92320b7224a77a8af61e55beef30ad49',1,'cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a9728f71c2e7a6a649bd28d8c11241b0a',1,'cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer()']]], + ['trivialiterator',['TrivialIterator',['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html',1,'cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a6cb3664b5cba4280b7055a65ddad7850',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#ada8cd3ac6db568bb9bf268ba2c3a3e14',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator(Iterator const &it)'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a3adf0440f9a0143a61b43d39c3f03721',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator(PredicateVector const &_vec)']]], + ['trivialpredicatetileadapter',['TrivialPredicateTileAdapter',['../structcutlass_1_1TrivialPredicateTileAdapter.html',1,'cutlass::TrivialPredicateTileAdapter'],['../structcutlass_1_1TrivialPredicateTileAdapter.html#a7259853a129a7e319b972d3b41dd59d7',1,'cutlass::TrivialPredicateTileAdapter::TrivialPredicateTileAdapter()']]], + ['true_5ftype',['true_type',['../namespacecutlass_1_1platform.html#a0eddc4a3921e137f31fd8014be96e807',1,'cutlass::platform']]], + ['type',['Type',['../structcutlass_1_1StorageType.html#a2b9c99ae52eb4962428f776efc1e7f06',1,'cutlass::StorageType::Type()'],['../structcutlass_1_1StorageType_3_014_01_4.html#aa6754c0eb530544a1457afe1ae94a807',1,'cutlass::StorageType< 4 >::Type()'],['../structcutlass_1_1StorageType_3_012_01_4.html#a66c52fe770774ea01c511aea1af1f8d4',1,'cutlass::StorageType< 2 >::Type()'],['../structcutlass_1_1StorageType_3_011_01_4.html#a4a70002785c378c1f180800f2a65bcd4',1,'cutlass::StorageType< 1 >::Type()'],['../structcutlass_1_1Vectorize.html#a070ec95f4297d769ee53a4d8a650c05e',1,'cutlass::Vectorize::Type()'],['../structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html#a79f147933e3f520145aee94ae18da3c5',1,'cutlass::Vectorize< Element_, 1 >::Type()'],['../structcutlass_1_1platform_1_1integral__constant.html#af58810ccead8f16ed88cd6a4afdc6e52',1,'cutlass::platform::integral_constant::type()'],['../structcutlass_1_1platform_1_1enable__if.html#aff9c0f270020cf097addf77e53a5af99',1,'cutlass::platform::enable_if::type()'],['../structcutlass_1_1platform_1_1conditional.html#ab6484d0dd6449b5195c4e868026fed11',1,'cutlass::platform::conditional::type()'],['../structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html#a8d55f500f667de560650554e9c220644',1,'cutlass::platform::conditional< false, T, F >::type()'],['../structcutlass_1_1platform_1_1remove__const.html#ac3662947fa50251daf58240a9c798085',1,'cutlass::platform::remove_const::type()'],['../structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html#af68706cfaa6af14edc26ad5b974b47e3',1,'cutlass::platform::remove_const< const T >::type()'],['../structcutlass_1_1platform_1_1remove__volatile.html#a4f5b043d46206248d1bbbcf650707dd1',1,'cutlass::platform::remove_volatile::type()'],['../structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html#aca9bb93efe43106321e4afe0b67542a3',1,'cutlass::platform::remove_volatile< volatile T >::type()'],['../structcutlass_1_1platform_1_1remove__cv.html#a19e5b12cf4eb15ce13d6306735b6de08',1,'cutlass::platform::remove_cv::type()'],['../structcutlass_1_1platform_1_1aligned__storage.html#a9cf0360f335bcd1e9d9e1b266b6dd6c1',1,'cutlass::platform::aligned_storage::type()']]] +]; diff --git a/docs/generated-html/search/all_13.html b/docs/generated-html/search/all_13.html new file mode 100644 index 00000000..04f66e2f --- /dev/null +++ b/docs/generated-html/search/all_13.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_13.js b/docs/generated-html/search/all_13.js new file mode 100644 index 00000000..e175495c --- /dev/null +++ b/docs/generated-html/search/all_13.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['unique_5fptr',['unique_ptr',['../classcutlass_1_1platform_1_1unique__ptr.html',1,'cutlass::platform::unique_ptr< T, Deleter >'],['../classcutlass_1_1platform_1_1unique__ptr.html#aa8a370bc7e4c2d99eb85e7fea27b3179',1,'cutlass::platform::unique_ptr::unique_ptr()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a14c8bf5a5deefe4a6602ccd5c5af364c',1,'cutlass::platform::unique_ptr::unique_ptr(pointer p)']]] +]; diff --git a/docs/generated-html/search/all_14.html b/docs/generated-html/search/all_14.html new file mode 100644 index 00000000..285f34bd --- /dev/null +++ b/docs/generated-html/search/all_14.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_14.js b/docs/generated-html/search/all_14.js new file mode 100644 index 00000000..267126dc --- /dev/null +++ b/docs/generated-html/search/all_14.js @@ -0,0 +1,15 @@ +var searchData= +[ + ['val',['val',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html#abc729cc51d5c90b1d7b0df3092d47cd4',1,'cutlass::platform::alignment_of::pad']]], + ['valid',['valid',['../structcutlass_1_1FragmentIterator.html#ab18f8ea676b45831f939715212167a99',1,'cutlass::FragmentIterator::valid()'],['../structcutlass_1_1FragmentConstIterator.html#a01571b2fc566793fd50a10fa82441951',1,'cutlass::FragmentConstIterator::valid()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ac4d2c293f9312b673ea29bf79b2882fd',1,'cutlass::gemm::GemmGlobalIteratorAb::valid()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6594acc213fc8d4289c6c73631f60120',1,'cutlass::gemm::GemmGlobalIteratorCd::valid()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a468f8f503777e4a2b0089ee2bd6c471a',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::valid()'],['../structcutlass_1_1TileIteratorBase.html#af78a2bf3e7507dc7f50343a3c209f770',1,'cutlass::TileIteratorBase::valid()']]], + ['value',['value',['../structcutlass_1_1log2__down.html#a793565cd891559fab765455e847171dca23d1b50f2f02e1026d4b5dc7ebd6880d',1,'cutlass::log2_down::value()'],['../structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html#ad7d3c2329ab708bd4af36ffaee8509cba282c4c5d8f66dc49544f34071f148b1f',1,'cutlass::log2_down< N, 1, Count >::value()'],['../structcutlass_1_1log2__up.html#a5826002505544547d0c5cc311c2338e3a09591054a7c9b184769d579c56dd09d6',1,'cutlass::log2_up::value()'],['../structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html#ab001737f02df0a2c514334a1bfa6f1f9a6b6af5b6bf14ee5d3e3f1442e7f75117',1,'cutlass::log2_up< N, 1, Count >::value()'],['../structcutlass_1_1sqrt__est.html#abe44577e3d8f34fc07bb9ecf89b25b11a2e73d046302be2504f50c08d788e9964',1,'cutlass::sqrt_est::value()'],['../structcutlass_1_1divide__assert.html#a20e8b8a803c6b5cfe636724760442e33ab924a64662c2eb917b1dd4ca31fdd2dc',1,'cutlass::divide_assert::value()'],['../structcutlass_1_1platform_1_1integral__constant.html#a9bbaca83ae76941edb9b75b2741d3ad9',1,'cutlass::platform::integral_constant::value()'],['../structcutlass_1_1platform_1_1is__base__of__helper.html#ac7e3ab73057682cc2eb6ed74c33e5eff',1,'cutlass::platform::is_base_of_helper::value()'],['../structcutlass_1_1platform_1_1alignment__of.html#aa1d40937d3536b68e90c580765821389aa36284864bc3d1f73d3bf73cd8da7c83',1,'cutlass::platform::alignment_of::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html#a6005c446eb41749276e0114b82abd990a5b0129d0f9bb45f1c56506efbbb22b6f',1,'cutlass::platform::alignment_of< int4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html#ac55e0c5a0bc4c95981744e55ee7580cea807729922944eede573430b20ad4b322',1,'cutlass::platform::alignment_of< uint4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html#ac9e709c32271b14b35c9607c64835a95a6a6ee3f24f4d123fc7c138fe5b776f2e',1,'cutlass::platform::alignment_of< float4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html#ad58512f76f0b9b000d48f1ff869a0547a3d020dd8ba5c735a60d7c2c897e158f5',1,'cutlass::platform::alignment_of< long4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html#adc0eec628649de183fe984bb46898830a8152a79c27d055dc3d0b8d662c0bc96a',1,'cutlass::platform::alignment_of< ulong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html#aadf6522691db02f1aab22c22716f0793a940fa73dc4f0a49b78e4e0cefaf4775d',1,'cutlass::platform::alignment_of< longlong2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html#a511f088278b3de04feb55ab60bdc5a09a58b5cc7be52956c43c2966af5887db80',1,'cutlass::platform::alignment_of< ulonglong2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html#a5fb114d264023728cca5364401bd6929a7b89d57c8009e094f69ff57e196d8318',1,'cutlass::platform::alignment_of< double2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html#a666c4fd30155873e3499f5cdc11782daafc1a7c2bb5e6483d42d380a2b4fd9561',1,'cutlass::platform::alignment_of< longlong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html#a2568c1ab218cab6505bd20e3c2c420ffa54f6e1afec0ed30b18ab79fd6faf81b5',1,'cutlass::platform::alignment_of< ulonglong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html#a024eaf40a8f3e8bd38b416868e0c68bca5a60b16666306472e92ad1320473ba85',1,'cutlass::platform::alignment_of< double4 >::value()']]], + ['value_5ftype',['value_type',['../structcutlass_1_1platform_1_1integral__constant.html#ab2ed0b3506818139f1f96639742e79fd',1,'cutlass::platform::integral_constant']]], + ['vector',['Vector',['../unioncutlass_1_1Vector.html',1,'cutlass::Vector< Scalar_, kLanes_ >'],['../structcutlass_1_1VectorTraits.html#a4ac6196c07e0d3ba8a03cd72a05026a2',1,'cutlass::VectorTraits::Vector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#a12b9084c48d2d829730f907485dfb5e5',1,'cutlass::VectorTraits< Vector< T, Lanes > >::Vector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#aff21f15596731eacf8c587811bb4ccdb',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::Vector()']]], + ['vector_2eh',['vector.h',['../vector_8h.html',1,'']]], + ['vector_3c_20half_2c_20klanes_5f_20_3e',['Vector< half, kLanes_ >',['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html',1,'cutlass']]], + ['vectorize',['Vectorize',['../structcutlass_1_1Vectorize.html',1,'cutlass']]], + ['vectorize_3c_20element_5f_2c_201_20_3e',['Vectorize< Element_, 1 >',['../structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html',1,'cutlass']]], + ['vectortraits',['VectorTraits',['../structcutlass_1_1VectorTraits.html',1,'cutlass']]], + ['vectortraits_3c_20vector_3c_20t_2c_20lanes_20_3e_20_3e',['VectorTraits< Vector< T, Lanes > >',['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html',1,'cutlass']]], + ['vectortraits_3c_20vector_3c_20t_2c_20lanes_20_3e_20const_20_3e',['VectorTraits< Vector< T, Lanes > const >',['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_15.html b/docs/generated-html/search/all_15.html new file mode 100644 index 00000000..0ed74e01 --- /dev/null +++ b/docs/generated-html/search/all_15.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_15.js b/docs/generated-html/search/all_15.js new file mode 100644 index 00000000..ddd79cab --- /dev/null +++ b/docs/generated-html/search/all_15.js @@ -0,0 +1,12 @@ +var searchData= +[ + ['warps',['Warps',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#aaff4a5e0f9e4256f184a22cad0ce8cf4',1,'cutlass::gemm::GemmSharedLoadTileATraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a7ad7a4e33ed43926e165e66162eb620b',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#af4597927405d8bb1ad2c464fad064703',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a4764f70691cb3fee91ce47653363aa4f',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#abb6ba58a2f2d80db0b2c9c1d88454efd',1,'cutlass::gemm::GemmConfig::Warps()']]], + ['wmma_5fgemm_5fepilogue_5ftraits_2eh',['wmma_gemm_epilogue_traits.h',['../wmma__gemm__epilogue__traits_8h.html',1,'']]], + ['wmma_5fgemm_5fglobal_5ftile_2eh',['wmma_gemm_global_tile.h',['../wmma__gemm__global__tile_8h.html',1,'']]], + ['wmma_5fgemm_5fmultiply_5fadd_2eh',['wmma_gemm_multiply_add.h',['../wmma__gemm__multiply__add_8h.html',1,'']]], + ['wmma_5fgemm_5fshared_5ftile_2eh',['wmma_gemm_shared_tile.h',['../wmma__gemm__shared__tile_8h.html',1,'']]], + ['wmma_5fgemm_5ftraits_2eh',['wmma_gemm_traits.h',['../wmma__gemm__traits_8h.html',1,'']]], + ['wmma_5fmatrix_2eh',['wmma_matrix.h',['../wmma__matrix_8h.html',1,'']]], + ['wmmagemmglobaliteratorcd',['WmmaGemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a505f124fa3f47c6d57b7275e81be6dd3',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::WmmaGemmGlobalIteratorCd()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa5c14e2a799249fe8bba14aa1dbe69dc',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::WmmaGemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int const pointer_offset=0, int const pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())']]], + ['wmmagemmglobaliteratorcdtraits',['WmmaGemmGlobalIteratorCdTraits',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/all_16.html b/docs/generated-html/search/all_16.html new file mode 100644 index 00000000..696f0252 --- /dev/null +++ b/docs/generated-html/search/all_16.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_16.js b/docs/generated-html/search/all_16.js new file mode 100644 index 00000000..d8526488 --- /dev/null +++ b/docs/generated-html/search/all_16.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['yes',['yes',['../structcutlass_1_1platform_1_1is__base__of__helper.html#ac1cf3f804e7686213fd42c678cc6d669',1,'cutlass::platform::is_base_of_helper']]] +]; diff --git a/docs/generated-html/search/all_17.html b/docs/generated-html/search/all_17.html new file mode 100644 index 00000000..f1e14b63 --- /dev/null +++ b/docs/generated-html/search/all_17.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_17.js b/docs/generated-html/search/all_17.js new file mode 100644 index 00000000..10f55890 --- /dev/null +++ b/docs/generated-html/search/all_17.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['_7eunique_5fptr',['~unique_ptr',['../classcutlass_1_1platform_1_1unique__ptr.html#a8902399dac4ab64f08f909f2ad9d4bcf',1,'cutlass::platform::unique_ptr']]] +]; diff --git a/docs/generated-html/search/all_2.html b/docs/generated-html/search/all_2.html new file mode 100644 index 00000000..2f17735e --- /dev/null +++ b/docs/generated-html/search/all_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_2.js b/docs/generated-html/search/all_2.js new file mode 100644 index 00000000..b440de04 --- /dev/null +++ b/docs/generated-html/search/all_2.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['base',['Base',['../structcutlass_1_1gemm_1_1GlobalLoadStream.html#a507f825824e624d80a34ea9395934160',1,'cutlass::gemm::GlobalLoadStream::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a581b7cdeef3e620f246923fa07f9db5a',1,'cutlass::gemm::GemmGlobalTileCdTraits::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ae13e0d30a941e16875f196b4844b03ed',1,'cutlass::gemm::GemmGlobalIteratorAb::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a8f8fbb65070589769468c6b1ac6ba7a5',1,'cutlass::gemm::GemmGlobalIteratorCd::Base()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#ac0c372c24c4c5340153b11edab874741',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Base()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a7ec19bf90207a7f598f2ec5166649495',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#aca63ec1099444c555299dc144282dded',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a4b23ba8c14e26672a516aa43063250c2',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue.html#a07f9a934f04610db41aa1aac2f4cdf04',1,'cutlass::gemm::IgemmEpilogue::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html#a98b415dbe6f7b6cb0c41a4e6b3ad5abf',1,'cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::Base()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#ab19f72d239f639f261fbb63f72f10acf',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Base()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#affd04d88a0bbef13c54f10000a5dc15d',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aef7047c6a0d0c3db0bfb6bec08520aad',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html#a194aa2762885c3d556a84ff410200b86',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::Base()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a48a8eda430139e6a131654a54bbf0f3b',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Base()'],['../classcutlass_1_1TensorView.html#a27f09c55f879410cceb75eb25fe542d4',1,'cutlass::TensorView::Base()'],['../structcutlass_1_1TileLoadIterator.html#a1bc1bd4893c14b313ee71b71db2903f3',1,'cutlass::TileLoadIterator::Base()'],['../structcutlass_1_1TileStoreIterator.html#af4576dca736bab8ac73b308522cb4a67',1,'cutlass::TileStoreIterator::Base()']]], + ['baseparams',['BaseParams',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a09268125f1e323874f6c12b50185c517',1,'cutlass::gemm::GemmGlobalIteratorAb::BaseParams()'],['../structcutlass_1_1TileLoadIterator.html#a788bab4fa46dc26854348b751cf1cc76',1,'cutlass::TileLoadIterator::BaseParams()'],['../structcutlass_1_1TileStoreIterator.html#a5484b46ac2646edb7a185b51137f70c0',1,'cutlass::TileStoreIterator::BaseParams()']]], + ['begin',['begin',['../structcutlass_1_1PredicateVector.html#a649045d8224514a4c28bcaf4b247b4a5',1,'cutlass::PredicateVector']]], + ['beta',['beta',['../structcutlass_1_1gemm_1_1GemmDesc.html#ab91b702a9932144b388fad3159130332',1,'cutlass::gemm::GemmDesc::beta()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a0e455ad2e4eba67259867f9123ca817b',1,'cutlass::gemm::LinearScaling::Params::beta()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#a8af4e58c4988838f2dd0a2172c47e12e',1,'cutlass::gemm::LinearScaling::beta()']]], + ['blockswizzle',['BlockSwizzle',['../structcutlass_1_1gemm_1_1GemmTraits.html#a50672b5fa67d858aeff8f254cf28e941',1,'cutlass::gemm::GemmTraits']]], + ['bool_5fconstant',['bool_constant',['../structcutlass_1_1platform_1_1bool__constant.html',1,'cutlass::platform']]], + ['byte',['byte',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html#a86f075f91b80918e968951713430f0b4',1,'cutlass::platform::alignment_of::pad']]] +]; diff --git a/docs/generated-html/search/all_3.html b/docs/generated-html/search/all_3.html new file mode 100644 index 00000000..a3e6f7db --- /dev/null +++ b/docs/generated-html/search/all_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_3.js b/docs/generated-html/search/all_3.js new file mode 100644 index 00000000..fa720a8d --- /dev/null +++ b/docs/generated-html/search/all_3.js @@ -0,0 +1,59 @@ +var searchData= +[ + ['check',['check',['../structcutlass_1_1platform_1_1is__base__of__helper.html#a5bf08859497e304ca353699ad6ac332b',1,'cutlass::platform::is_base_of_helper::check(DerivedT *, T)'],['../structcutlass_1_1platform_1_1is__base__of__helper.html#ae8896817cabf297437b3a073e693ffd2',1,'cutlass::platform::is_base_of_helper::check(BaseT *, int)']]], + ['clamp',['clamp',['../structcutlass_1_1Coord.html#a482ada6da62f427987c22098796fcf7e',1,'cutlass::Coord']]], + ['clear',['clear',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a5513254af1f9979b6d0b9f236c3e7325',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage::clear()'],['../structcutlass_1_1Fragment.html#a29e7408fcde8cdf9de5e3a10eaa46391',1,'cutlass::Fragment::clear()'],['../structcutlass_1_1gemm_1_1ClearAccumulators.html#adb8026a19b09e9a581ec767c2c2da4ab',1,'cutlass::gemm::ClearAccumulators::clear()']]], + ['clear_5faccumulators_2eh',['clear_accumulators.h',['../clear__accumulators_8h.html',1,'']]], + ['clearaccumulators',['ClearAccumulators',['../structcutlass_1_1gemm_1_1ClearAccumulators.html',1,'cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >'],['../structcutlass_1_1gemm_1_1GemmTraits.html#ae1cf7988c9cff79a2c3252aaf91fc165',1,'cutlass::gemm::GemmTraits::ClearAccumulators()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#aba2366bec386c74df47dfd0426b07041',1,'cutlass::gemm::HgemmTraitsHelper::ClearAccumulators()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a5645e18de29a84c9a9b3f3105966f0c5',1,'cutlass::gemm::IgemmTraitsHelper::ClearAccumulators()'],['../structcutlass_1_1gemm_1_1ClearAccumulators.html#a4ba07ea6d6fef961de1cb95b13c672ef',1,'cutlass::gemm::ClearAccumulators::ClearAccumulators()']]], + ['commit',['commit',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a6ce2c6e81d159d8e9ab736cb263f44ae',1,'cutlass::gemm::GlobalLoadStreamBase::commit()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a9cc435369c7fc76d0bb6233a8258e257',1,'cutlass::gemm::SharedLoadStream::commit()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a6dc512be014b9d849057e2fd4c0b0485',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::commit()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#ade2d85507dec77591e66276339a1eef5',1,'cutlass::gemm::GemmTraits::SharedLoadStream::commit()']]], + ['computeoffsetfromshape',['ComputeOffsetFromShape',['../structcutlass_1_1ComputeOffsetFromShape.html',1,'cutlass']]], + ['computeoffsetfromshape_3c_20shape_3c_201_2c_20ksh_5f_2c_20ksw_5f_2c_201_20_3e_20_3e',['ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >',['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromshape_3c_20shape_3c_201_2c_20ksh_5f_2c_20ksw_5f_2c_20ksc_5f_20_3e_20_3e',['ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >',['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromstrides',['ComputeOffsetFromStrides',['../structcutlass_1_1ComputeOffsetFromStrides.html',1,'cutlass']]], + ['computeoffsetfromstrides_3c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_201_20_3e_20_3e',['ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >',['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromstrides_3c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_20s_5fc_5f_20_3e_20_3e',['ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >',['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html',1,'cutlass']]], + ['computethreadoffsetfromstrides',['ComputeThreadOffsetFromStrides',['../structcutlass_1_1ComputeThreadOffsetFromStrides.html',1,'cutlass']]], + ['computethreadoffsetfromstrides_3c_20shape_3c_201_2c_20t_5fh_5f_2c_20t_5fw_5f_2c_201_20_3e_2c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_201_20_3e_20_3e',['ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >',['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html',1,'cutlass']]], + ['computethreadoffsetfromstrides_3c_20shape_3c_201_2c_20t_5fh_5f_2c_20t_5fw_5f_2c_20t_5fc_5f_20_3e_2c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_20s_5fc_5f_20_3e_20_3e',['ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >',['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html',1,'cutlass']]], + ['conditional',['conditional',['../structcutlass_1_1platform_1_1conditional.html',1,'cutlass::platform']]], + ['conditional_3c_20false_2c_20t_2c_20f_20_3e',['conditional< false, T, F >',['../structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html',1,'cutlass::platform']]], + ['congruous',['Congruous',['../structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html#abe4eb7f9a0ed7d48a81029e88849dcf2',1,'cutlass::gemm::GemmOperandTraitsAb']]], + ['const_5fbegin',['const_begin',['../structcutlass_1_1PredicateVector.html#aeb7f9226a4fa49d06500c3c83958dc41',1,'cutlass::PredicateVector']]], + ['const_5fend',['const_end',['../structcutlass_1_1PredicateVector.html#ab931610bc07ee0e87bb4d9a4d53a2321',1,'cutlass::PredicateVector']]], + ['const_5fref',['const_ref',['../classcutlass_1_1TensorView.html#a23564f1d333bb16343ed3a885f894285',1,'cutlass::TensorView']]], + ['constexpr',['constexpr',['../platform_8h.html#a72f0657181cca64b44eb186b707eb380',1,'platform.h']]], + ['constiterator',['ConstIterator',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html',1,'cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a1216aab9c567ec0d4232019008ef3ea7',1,'cutlass::PredicateVector::ConstIterator::ConstIterator(ConstIterator const &it)'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a590e4f4533c87162c0b79e8d876a8fda',1,'cutlass::PredicateVector::ConstIterator::ConstIterator(PredicateVector const &_vec, int _start=0)']]], + ['constpredicatetileadapter',['ConstPredicateTileAdapter',['../structcutlass_1_1ConstPredicateTileAdapter.html',1,'cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >'],['../structcutlass_1_1ConstPredicateTileAdapter.html#a9abd78d5c3e444bfb23d2b1a08be2be1',1,'cutlass::ConstPredicateTileAdapter::ConstPredicateTileAdapter()']]], + ['consttensorref_5ft',['ConstTensorRef_t',['../classcutlass_1_1TensorView.html#a8ef76170bc5ba832dc01339133021830',1,'cutlass::TensorView']]], + ['contains',['contains',['../classcutlass_1_1TensorView.html#aa94063d9a9c6e599d3f53e22433274be',1,'cutlass::TensorView']]], + ['convert',['Convert',['../structcutlass_1_1Convert.html',1,'cutlass::Convert< InputFragment_, OutputFragment_ >'],['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a593a5a2c48708965e829d242ccb3b99f',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::Convert()'],['../classcutlass_1_1TensorRef.html#a7eb4444e2b3fce5a5ccde65a75df633c',1,'cutlass::TensorRef::convert()']]], + ['convert_2eh',['convert.h',['../convert_8h.html',1,'']]], + ['convert_3c_20fragment_3c_20inputscalar_5f_2c_20kscalars_5f_20_3e_2c_20fragment_3c_20outputscalar_5f_2c_20kscalars_5f_20_3e_20_3e',['Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html',1,'cutlass']]], + ['coord',['Coord',['../structcutlass_1_1Coord.html',1,'cutlass::Coord< N_ >'],['../structcutlass_1_1Coord.html#a9cbfff91f0b0d0a149534c97e3d6e69b',1,'cutlass::Coord::Coord(int value=0)'],['../structcutlass_1_1Coord.html#a53a3d88a884f6cb7fda8aedfe2cec2c5',1,'cutlass::Coord::Coord(int _idx[])']]], + ['coord_2eh',['coord.h',['../coord_8h.html',1,'']]], + ['coord_3c_204_20_3e',['Coord< 4 >',['../structcutlass_1_1Coord.html',1,'cutlass']]], + ['coord_3c_20rank_20_3e',['Coord< Rank >',['../structcutlass_1_1Coord.html',1,'cutlass']]], + ['coord_5ft',['Coord_t',['../classcutlass_1_1TensorView.html#a4037baf5069138ec3967810d2e185017',1,'cutlass::TensorView']]], + ['copy',['Copy',['../structcutlass_1_1Copy.html',1,'cutlass::Copy< Fragment_ >'],['../structcutlass_1_1Copy.html#ab2c20f886208396a1779c6d29b56c3f1',1,'cutlass::Copy::Copy()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#af7a15b4456cda01c1ffbb2fdc532e87e',1,'cutlass::gemm::GlobalLoadStreamBase::copy()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a7f6bf3b8d70bcd74d84519decd9f0d8e',1,'cutlass::gemm::SharedLoadStream::copy(FetchedFragment &fetched)'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a279144e9722055d4b862e3fa25948762',1,'cutlass::gemm::SharedLoadStream::copy(int d, FetchedFragment &fetched)'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#ae033f55779b45b4228f40a4d699062bb',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::copy()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#af25495bb0bb35bd64246d3a80fe4806f',1,'cutlass::gemm::GemmTraits::SharedLoadStream::copy()']]], + ['core_5fio_2eh',['core_io.h',['../core__io_8h.html',1,'']]], + ['count',['count',['../structcutlass_1_1Coord.html#a40429a9154f7a142ad7e9eb35282d196',1,'cutlass::Coord']]], + ['cuda_5flog',['CUDA_LOG',['../debug_8h.html#a27e3466bcf1ec7fda4f6f95aa0a51177',1,'debug.h']]], + ['cuda_5flog_5fdebug',['CUDA_LOG_DEBUG',['../debug_8h.html#a8d6986db819719ada8b29d53dfc104a6',1,'debug.h']]], + ['cuda_5fperror',['CUDA_PERROR',['../debug_8h.html#aed8337b88d71895f95f8980ef0b3a50b',1,'debug.h']]], + ['cuda_5fperror_5fdebug',['CUDA_PERROR_DEBUG',['../debug_8h.html#a36436f5408940a47ac5cdfc9b31648db',1,'debug.h']]], + ['cuda_5fperror_5fexit',['CUDA_PERROR_EXIT',['../debug_8h.html#a002632ff687c83cff0484476be401f05',1,'debug.h']]], + ['cuda_5fperror_5fimpl',['cuda_perror_impl',['../namespacecutlass.html#a6d3dfeb642a2ce3d5f52243fe48f89cc',1,'cutlass']]], + ['cutlass',['cutlass',['../namespacecutlass.html',1,'']]], + ['cutlass_2eh',['cutlass.h',['../cutlass_8h.html',1,'']]], + ['cutlass_5fassert',['CUTLASS_ASSERT',['../cutlass_8h.html#a0159b8e4cd578881a1ccfd0921516af7',1,'cutlass.h']]], + ['cutlass_5fhost_5fdevice',['CUTLASS_HOST_DEVICE',['../cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1',1,'cutlass.h']]], + ['cutlass_5fmajor',['CUTLASS_MAJOR',['../cutlass_8h.html#a8ff3cda9323810c1c504793a0206d4b8',1,'cutlass.h']]], + ['cutlass_5fmath_2eh',['cutlass_math.h',['../cutlass__math_8h.html',1,'']]], + ['cutlass_5fminor',['CUTLASS_MINOR',['../cutlass_8h.html#ad114a1ab01f73833ea00020ffb7bcea7',1,'cutlass.h']]], + ['cutlass_5fpatch',['CUTLASS_PATCH',['../cutlass_8h.html#a1d4e5818a594bbfc472e54978955cb8b',1,'cutlass.h']]], + ['cutlass_5fpragma_5fno_5funroll',['CUTLASS_PRAGMA_NO_UNROLL',['../cutlass_8h.html#adb3bc73d74b4a4bf13099d5696db3352',1,'cutlass.h']]], + ['cutlass_5fpragma_5funroll',['CUTLASS_PRAGMA_UNROLL',['../cutlass_8h.html#a4b1c9f25ab6eaa25e1f2258dd63e6ce4',1,'cutlass.h']]], + ['cutlass_5fversion',['CUTLASS_VERSION',['../cutlass_8h.html#aa3040eddf073214969f9445bfa925039',1,'cutlass.h']]], + ['gemm',['gemm',['../namespacecutlass_1_1gemm.html',1,'cutlass']]], + ['platform',['platform',['../namespacecutlass_1_1platform.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_4.html b/docs/generated-html/search/all_4.html new file mode 100644 index 00000000..6452295d --- /dev/null +++ b/docs/generated-html/search/all_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_4.js b/docs/generated-html/search/all_4.js new file mode 100644 index 00000000..c58b1aa4 --- /dev/null +++ b/docs/generated-html/search/all_4.js @@ -0,0 +1,19 @@ +var searchData= +[ + ['d_5fa',['d_a',['../structcutlass_1_1gemm_1_1GemmDesc.html#aae63781de41962f496da469684919447',1,'cutlass::gemm::GemmDesc']]], + ['d_5fb',['d_b',['../structcutlass_1_1gemm_1_1GemmDesc.html#a05915032eba39bc9b085bec5ff17257b',1,'cutlass::gemm::GemmDesc']]], + ['d_5fc',['d_c',['../structcutlass_1_1gemm_1_1GemmDesc.html#aa2b3126c082d04fd31521cb0e84cf4d5',1,'cutlass::gemm::GemmDesc']]], + ['d_5fd',['d_d',['../structcutlass_1_1gemm_1_1GemmDesc.html#a30326e2d81c8e154d749f35837903216',1,'cutlass::gemm::GemmDesc']]], + ['data',['data',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a3af66b82b1a0cc5bf6141f940553e048',1,'cutlass::gemm::GemmGlobalIteratorAb::data()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a0d3c1a58f23957f9850d1b22992a981a',1,'cutlass::gemm::GemmGlobalIteratorCd::data()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6fd4e62eb280a5b8c17eb79141414581',1,'cutlass::gemm::GemmGlobalIteratorCd::data() const'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#afe77778a126449e210c0bd6ec2dc6709',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::data()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a90e9886534ecbbce69f57b4030d0903f',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::data() const'],['../classcutlass_1_1TensorRef.html#a8e23c78658f45c6f197a1774cc85c5b7',1,'cutlass::TensorRef::data()'],['../classcutlass_1_1TensorView.html#a248e4240ccf96c976254464710a73fc8',1,'cutlass::TensorView::data()'],['../structcutlass_1_1TileLoadIterator.html#afb6320b600f1f561594a9fb543b954e4',1,'cutlass::TileLoadIterator::data()'],['../structcutlass_1_1TileStoreIterator.html#a5ebab59862d5f50ad980871515d999b0',1,'cutlass::TileStoreIterator::data()']]], + ['debug_2eh',['debug.h',['../debug_8h.html',1,'']]], + ['default_5fdelete',['default_delete',['../structcutlass_1_1platform_1_1default__delete.html',1,'cutlass::platform']]], + ['default_5fdelete_3c_20t_5b_5d_3e',['default_delete< T[]>',['../structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html',1,'cutlass::platform']]], + ['deleter_5ftype',['deleter_type',['../classcutlass_1_1platform_1_1unique__ptr.html#a85cab9945c36dc56bd7d6adf30c0d252',1,'cutlass::platform::unique_ptr']]], + ['delta',['Delta',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#af1f105d4712f01880b0944666e2f81ae',1,'cutlass::gemm::GemmEpilogueTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aede069e51e0732a9648c437261bd4d66',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Delta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a07bb48f99000256f04f00564a4371c2f',1,'cutlass::gemm::GemmGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#aba61fb6e93a6423ab72c082c280f5db4',1,'cutlass::gemm::GemmGlobalTileCdTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a645f65f7d8f123936b286521df470224',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#afd691b764b7d105a1ed41dada6049e71',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a2ee87510d2deccf8b9633aaa4f6340ea',1,'cutlass::gemm::GemmSharedLoadTileATraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ad029d098ba13543bf99c728e6b93006d',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a5587ef22f419ab9a7c6117917cc99c57',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#ac5578da2577cddd5a38cb628f894f644',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Delta()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#a8f8de5a6811b77f0c721cd78a237223e',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#aed055504ec5f09657e059416150188a9',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Delta()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a08dada072eefded4c859df4e5fc25ca6',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html#ab55665f7c2f2cb8b8b9b8ac852d48002',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::Delta()'],['../structcutlass_1_1TileTraits.html#af88f5cea9f452d83004ea0fa0f9d56eb',1,'cutlass::TileTraits::Delta()'],['../structcutlass_1_1TileIteratorBase.html#a9bc6c04f4a3adeb5a29743fa43425088',1,'cutlass::TileIteratorBase::Delta()'],['../structcutlass_1_1TileLoadIterator.html#ac2a7f94723259f0d3c7b8a6d5b8778bf',1,'cutlass::TileLoadIterator::Delta()'],['../structcutlass_1_1TileStoreIterator.html#a1c433ba0eea5e6a46f36101d8de98ed0',1,'cutlass::TileStoreIterator::Delta()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a47404b4527b101e286347714aea687d5',1,'cutlass::TileTraitsStrideMajor::Delta()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#ab1a4945bf562debeee1af813288e5896',1,'cutlass::TileTraitsContiguousMajor::Delta()'],['../structcutlass_1_1TileTraitsWarpRake.html#a3ce218b223c5716af40c316899324bbe',1,'cutlass::TileTraitsWarpRake::Delta()']]], + ['dgemm_5ftraits_2eh',['dgemm_traits.h',['../dgemm__traits_8h.html',1,'']]], + ['dgemmconfig',['DgemmConfig',['../structcutlass_1_1gemm_1_1DgemmConfig.html',1,'cutlass::gemm']]], + ['dgemmtraits',['DgemmTraits',['../structcutlass_1_1gemm_1_1DgemmTraits.html',1,'cutlass::gemm']]], + ['divide_5fassert',['divide_assert',['../structcutlass_1_1divide__assert.html',1,'cutlass']]], + ['dot',['dot',['../structcutlass_1_1Coord.html#ad4b3704d14057c043f972827671115cf',1,'cutlass::Coord::dot(Coord const &b, T sum) const'],['../structcutlass_1_1Coord.html#ae023c0c664c22a978e9b9ce5e063aae4',1,'cutlass::Coord::dot(Coord const &b) const']]], + ['dummy',['dummy',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html',1,'cutlass::platform::is_base_of_helper']]] +]; diff --git a/docs/generated-html/search/all_5.html b/docs/generated-html/search/all_5.html new file mode 100644 index 00000000..e59e1d53 --- /dev/null +++ b/docs/generated-html/search/all_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_5.js b/docs/generated-html/search/all_5.js new file mode 100644 index 00000000..066d4cd3 --- /dev/null +++ b/docs/generated-html/search/all_5.js @@ -0,0 +1,14 @@ +var searchData= +[ + ['element',['Element',['../structcutlass_1_1Fragment.html#a9c67fa5bbd0b8b49bd6ec002dee3cbab',1,'cutlass::Fragment::Element()'],['../structcutlass_1_1FragmentIterator.html#ab4ef3c5a6b5e13224e45bbbcb9f1bc5d',1,'cutlass::FragmentIterator::Element()'],['../structcutlass_1_1FragmentConstIterator.html#ae98ab2a88342e7dbf9631cfb5cf5e706',1,'cutlass::FragmentConstIterator::Element()']]], + ['element_5ftype',['element_type',['../classcutlass_1_1platform_1_1unique__ptr.html#a94cea0ebf2ac4bec69dfa1f80ea07d50',1,'cutlass::platform::unique_ptr']]], + ['enable_5fif',['enable_if',['../structcutlass_1_1platform_1_1enable__if.html',1,'cutlass::platform']]], + ['enable_5fif_3c_20false_2c_20t_20_3e',['enable_if< false, T >',['../structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html',1,'cutlass::platform']]], + ['end',['end',['../structcutlass_1_1PredicateVector.html#ad9493fc80fdc33330cc15641779cc275',1,'cutlass::PredicateVector']]], + ['epilogue',['Epilogue',['../structcutlass_1_1gemm_1_1GemmTraits.html#a424f1ac14e1e7ad37428edd0cf13e7fe',1,'cutlass::gemm::GemmTraits::Epilogue()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a234ae6065d5ab56135e10119d3ad2d98',1,'cutlass::gemm::HgemmTraitsHelper::Epilogue()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a5e2ed697a9091a1ca8b19855b5a2c651',1,'cutlass::gemm::IgemmTraitsHelper::Epilogue()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a073430a1e8b124aec8a1f1e00f262bc8',1,'cutlass::gemm::GemmTraits::Params::epilogue()'],['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html#afdca9ac1d28e17efaa394f5831a60c04',1,'cutlass::gemm::GemmTraits::SharedStorage::epilogue()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ae1983e37454ed14272b23b964614c54c',1,'cutlass::gemm::GemmEpilogue::epilogue()']]], + ['epilogue_5fwith_5for_5fwithout_5fbeta',['epilogue_with_or_without_beta',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a0c24dce365565f75e7edc1de1cb50ea4',1,'cutlass::gemm::GemmEpilogue']]], + ['evaluate',['evaluate',['../structcutlass_1_1gemm_1_1LinearScaling.html#a2e0d140aed388d2457dfb24d28fcd08a',1,'cutlass::gemm::LinearScaling::evaluate(Fragment_ const &accum, Fragment_ &output)'],['../structcutlass_1_1gemm_1_1LinearScaling.html#a47a53e5b67b2207fb3ba38a8b9cef448',1,'cutlass::gemm::LinearScaling::evaluate(Fragment_ const &accum, Fragment_ const &old, Fragment_ &output)']]], + ['extent',['Extent',['../structcutlass_1_1Extent.html',1,'cutlass']]], + ['extent_3c_20vector_3c_20t_2c_20lanes_20_3e_20_3e',['Extent< Vector< T, Lanes > >',['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html',1,'cutlass']]], + ['extent_3c_20vector_3c_20t_2c_20lanes_20_3e_20const_20_3e',['Extent< Vector< T, Lanes > const >',['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_6.html b/docs/generated-html/search/all_6.html new file mode 100644 index 00000000..f75a754e --- /dev/null +++ b/docs/generated-html/search/all_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_6.js b/docs/generated-html/search/all_6.js new file mode 100644 index 00000000..0734def4 --- /dev/null +++ b/docs/generated-html/search/all_6.js @@ -0,0 +1,32 @@ +var searchData= +[ + ['false_5ftype',['false_type',['../namespacecutlass_1_1platform.html#ad8c95b2109070847b13d355120344380',1,'cutlass::platform']]], + ['fetched_5fa',['fetched_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a3147da380e4c1e465aba0b965ac87ab5',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fetched_5fb',['fetched_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a837fbec1d47ae45480941de6290889c0',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fetched_5ffragment',['fetched_fragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a26aa580a2697ad02c27f868e7779348d',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['fetchedfragment',['FetchedFragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0a7f6ae85cfb162b1facf24dff8bab36',1,'cutlass::gemm::GlobalLoadStreamBase::FetchedFragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a41b45085f17532a6394de3f5ccf201e7',1,'cutlass::gemm::SharedLoadStream::FetchedFragment()']]], + ['fill',['fill',['../structcutlass_1_1PredicateVector.html#a236bd1a822479750a809452fd58dd917',1,'cutlass::PredicateVector']]], + ['fragment',['Fragment',['../structcutlass_1_1Fragment.html',1,'cutlass::Fragment< Element_, kElements_, kAlignment_ >'],['../structcutlass_1_1FragmentIterator.html#afd15cbe1c9a0fd7871b12f3f3042c808',1,'cutlass::FragmentIterator::Fragment()'],['../structcutlass_1_1FragmentConstIterator.html#acac5b62b365f36f370adb0fee11cea05',1,'cutlass::FragmentConstIterator::Fragment()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a32687e2aa49dfa251eab14d5cd2036be',1,'cutlass::gemm::GlobalLoadStreamBase::Fragment()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a2180cfbb482d300472ad2993e4b555d4',1,'cutlass::gemm::GemmGlobalIteratorAb::Fragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a9f025ed2609bf33230f6a390c22b11b7',1,'cutlass::gemm::SharedLoadStream::Fragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a82dc6d9a10de7aba9a69e6025b2cc2b7',1,'cutlass::gemm::HgemmSwizzle::Fragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a67693ee79f93cb61fc37f2e632eaea8d',1,'cutlass::gemm::IgemmSwizzle::Fragment()'],['../structcutlass_1_1TileIteratorBase.html#a0d7b595d7959cc1680fc07c2e02e1c8e',1,'cutlass::TileIteratorBase::Fragment()'],['../structcutlass_1_1TileLoadIterator.html#aaf72c4897641080b1d84c0bbd8d813cc',1,'cutlass::TileLoadIterator::Fragment()'],['../structcutlass_1_1TileStoreIterator.html#a95da23108b74ad085024ab45e84083e1',1,'cutlass::TileStoreIterator::Fragment()']]], + ['fragment_2eh',['fragment.h',['../fragment_8h.html',1,'']]], + ['fragment_5fa',['fragment_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a4a8c64d85aa012e3689dd024c486924b',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fragment_5fb',['fragment_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#aa28f34fb0c4bf739246d92c2fef80e0b',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fragment_20concept',['Fragment Concept',['../group__fragment__concept.html',1,'']]], + ['fragment_20iterator_20concept',['Fragment Iterator Concept',['../group__fragment__iterator__concept.html',1,'']]], + ['fragment_5fload_5fstore_2eh',['fragment_load_store.h',['../fragment__load__store_8h.html',1,'']]], + ['fragment_5fmultiply_5fadd_2eh',['fragment_multiply_add.h',['../fragment__multiply__add_8h.html',1,'']]], + ['fragmenta',['FragmentA',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a1daf96b6d152c5cf32f248bbfd605b74',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a71aadbb130d4b1a6532c45282b37354f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a69d387d932b628dc51c18fcc178c4914',1,'cutlass::gemm::ThreadMultiplyAdd::FragmentA()']]], + ['fragmentb',['FragmentB',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#ae79e7fc5be2f4c8d30ca83edc151f63a',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a43e278686b493d0aef943f32a9f47b9e',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a5429a730a1dea00dc4aecbe8e3ef1620',1,'cutlass::gemm::ThreadMultiplyAdd::FragmentB()']]], + ['fragmentconstiterator',['FragmentConstIterator',['../structcutlass_1_1FragmentConstIterator.html',1,'cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >'],['../structcutlass_1_1TileIteratorBase.html#a25a241bbdc0b0121992019a16f1a6d60',1,'cutlass::TileIteratorBase::FragmentConstIterator()'],['../structcutlass_1_1TileLoadIterator.html#a4c7a3a4917245de8269b74bdabe16b76',1,'cutlass::TileLoadIterator::FragmentConstIterator()'],['../structcutlass_1_1TileStoreIterator.html#a48de0db7ee2ee9699b946a9d5a0364c7',1,'cutlass::TileStoreIterator::FragmentConstIterator()'],['../structcutlass_1_1FragmentConstIterator.html#ac4b6f351e6e72bed37e425f02a10c81e',1,'cutlass::FragmentConstIterator::FragmentConstIterator(OtherFragment_ &fragment, int offset=0)'],['../structcutlass_1_1FragmentConstIterator.html#a3a8fd8f13c157ed13dc93fd78036c59e',1,'cutlass::FragmentConstIterator::FragmentConstIterator(FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)']]], + ['fragmentelement',['FragmentElement',['../structcutlass_1_1TileIteratorBase.html#ac7cca14d54bf3f0749db1ffaea7c9ae7',1,'cutlass::TileIteratorBase::FragmentElement()'],['../structcutlass_1_1TileLoadIterator.html#a2edd89863b8035137ccd8dd3ad7be464',1,'cutlass::TileLoadIterator::FragmentElement()'],['../structcutlass_1_1TileStoreIterator.html#a2b13136a970fae187fcb377c9be28fac',1,'cutlass::TileStoreIterator::FragmentElement()']]], + ['fragmentiterator',['FragmentIterator',['../structcutlass_1_1FragmentIterator.html',1,'cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >'],['../structcutlass_1_1TileIteratorBase.html#a379a52ed1128fc9f93cad35d3e3233e5',1,'cutlass::TileIteratorBase::FragmentIterator()'],['../structcutlass_1_1TileLoadIterator.html#aebbe5a0996dcd362caad618e78dc2591',1,'cutlass::TileLoadIterator::FragmentIterator()'],['../structcutlass_1_1TileStoreIterator.html#a0843b2d82422e7178f324a8d3be9d705',1,'cutlass::TileStoreIterator::FragmentIterator()'],['../structcutlass_1_1FragmentIterator.html#ae1825fe3e138e2aa62d27dab2b5227b4',1,'cutlass::FragmentIterator::FragmentIterator()']]], + ['fragmentload',['FragmentLoad',['../structcutlass_1_1FragmentLoad.html',1,'cutlass']]], + ['fragmentload_3c_20iteratorfragment_3a_3akscalar_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html',1,'cutlass']]], + ['fragmentload_3c_20iteratorfragment_3a_3akwmmamatrix_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html',1,'cutlass']]], + ['fragmentmultiplyadd',['FragmentMultiplyAdd',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html',1,'cutlass::gemm::FragmentMultiplyAdd< Scalar_ >'],['../structcutlass_1_1gemm_1_1LinearScaling.html#aa697d4eaced1ef08247aeb1fcc0f0ea8',1,'cutlass::gemm::LinearScaling::FragmentMultiplyAdd()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#af19e14a22aefd1124f7d31beec6f8c42',1,'cutlass::gemm::FragmentMultiplyAdd::FragmentMultiplyAdd()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a21f0965f6178917c7f5c6d79ed048059',1,'cutlass::gemm::FragmentMultiplyAdd< half >::FragmentMultiplyAdd()']]], + ['fragmentmultiplyadd_3c_20half_20_3e',['FragmentMultiplyAdd< half >',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html',1,'cutlass::gemm']]], + ['fragmentshape',['FragmentShape',['../structcutlass_1_1FragmentIterator.html#a63ff1767c4923b0a2b6b64487306ed76',1,'cutlass::FragmentIterator::FragmentShape()'],['../structcutlass_1_1FragmentConstIterator.html#a880f12d0cd42cdae7ce6009d2233f577',1,'cutlass::FragmentConstIterator::FragmentShape()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#afe44fedcf24b90c0cf6ac7d1495b89e4',1,'cutlass::gemm::HgemmSwizzle::FragmentShape()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a13a3b052cd8b714471489a9cc4dc7004',1,'cutlass::gemm::IgemmSwizzle::FragmentShape()'],['../structcutlass_1_1TileIteratorBase.html#a14f4b356c9cd320e6e7b451edbf58c24',1,'cutlass::TileIteratorBase::FragmentShape()'],['../structcutlass_1_1TileLoadIterator.html#a7c27a7b0d8593b002eca186c15fdc869',1,'cutlass::TileLoadIterator::FragmentShape()'],['../structcutlass_1_1TileStoreIterator.html#a3b872e85844c9e009fa480a71a829136',1,'cutlass::TileStoreIterator::FragmentShape()']]], + ['fragmentstore',['FragmentStore',['../structcutlass_1_1FragmentStore.html',1,'cutlass']]], + ['fragmentstore_3c_20iteratorfragment_3a_3akscalar_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html',1,'cutlass']]], + ['fragmentstore_3c_20iteratorfragment_3a_3akwmmamatrix_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html',1,'cutlass']]], + ['functor',['functor',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#afa888d993b86ed88950a9e5ab7edeb06',1,'cutlass::gemm::GemmEpilogueTraits::Params::functor()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a6c30bea1b2a1bd2e981025851d5b12d1',1,'cutlass::gemm::GemmEpilogue::Functor()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a7cdb30f17692e8fdb3dd4cf4c0b8e9ee',1,'cutlass::gemm::GemmEpilogueTraits::Functor()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a981134cf87d85aa28570a62d9e878b10',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Functor()']]] +]; diff --git a/docs/generated-html/search/all_7.html b/docs/generated-html/search/all_7.html new file mode 100644 index 00000000..88acd946 --- /dev/null +++ b/docs/generated-html/search/all_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_7.js b/docs/generated-html/search/all_7.js new file mode 100644 index 00000000..4c4dee82 --- /dev/null +++ b/docs/generated-html/search/all_7.js @@ -0,0 +1,85 @@ +var searchData= +[ + ['gcd',['gcd',['../namespacecutlass.html#a38481ebfe13bc199aa621ceecfa016b8',1,'cutlass']]], + ['gemm',['Gemm',['../structcutlass_1_1gemm_1_1Gemm.html',1,'cutlass::gemm::Gemm< GemmTraits_ >'],['../structcutlass_1_1gemm_1_1Gemm.html#a8bff0bd32aec05f8c1e282024be0bcfd',1,'cutlass::gemm::Gemm::Gemm()']]], + ['gemm_2eh',['gemm.h',['../gemm_8h.html',1,'']]], + ['gemm_5fepilogue_2eh',['gemm_epilogue.h',['../gemm__epilogue_8h.html',1,'']]], + ['gemm_5fepilogue_5ftraits_2eh',['gemm_epilogue_traits.h',['../gemm__epilogue__traits_8h.html',1,'']]], + ['gemm_5fglobal_5fstream_2eh',['gemm_global_stream.h',['../gemm__global__stream_8h.html',1,'']]], + ['gemm_5fglobal_5ftile_2eh',['gemm_global_tile.h',['../gemm__global__tile_8h.html',1,'']]], + ['gemm_5fkernel',['gemm_kernel',['../namespacecutlass_1_1gemm.html#ad9577c9086b0f7fd1202d7f8109e4439',1,'cutlass::gemm']]], + ['gemm_5foperand_2eh',['gemm_operand.h',['../gemm__operand_8h.html',1,'']]], + ['gemm_5fshared_5fstream_2eh',['gemm_shared_stream.h',['../gemm__shared__stream_8h.html',1,'']]], + ['gemm_5fshared_5ftile_2eh',['gemm_shared_tile.h',['../gemm__shared__tile_8h.html',1,'']]], + ['gemm_5ftraits_2eh',['gemm_traits.h',['../gemm__traits_8h.html',1,'']]], + ['gemmconfig',['GemmConfig',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a4efe5d156abca056ef8b5334fb574dd5',1,'cutlass::gemm::GemmTraits::GemmConfig()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a1597c776238f35bcb1acc0a8f8f9c118',1,'cutlass::gemm::HgemmTraitsHelper::GemmConfig()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#af10aebe7ca4e24cce435ac4cd60e7bac',1,'cutlass::gemm::IgemmTraitsHelper::GemmConfig()']]], + ['gemmconfig_3c_20double_2c_20double_2c_20double_2c_20double_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20double_2c_20double_2c_20double_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_202_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_202_2c_201_2c_202_2c_201_2c_202_20_3e',['GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20float_2c_20float_2c_20float_2c_20float_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20float_2c_20float_2c_20float_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_204_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_204_2c_201_2c_204_2c_201_2c_202_20_3e',['GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20half_2c_20half_2c_20half_2c_20half_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20half_2c_20half_2c_20half_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_208_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_208_2c_202_2c_208_2c_202_2c_202_20_3e',['GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20int8_5ft_2c_20int8_5ft_2c_20int8_5ft_2c_20int8_5ft_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_204_2c_202_20_3e',['GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20int8_5ft_2c_20int8_5ft_2c_20scalard_5f_2c_20scalard_5f_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_2016_2c_201_2c_204_2c_201_2c_202_20_3e',['GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmdesc',['GemmDesc',['../structcutlass_1_1gemm_1_1GemmDesc.html',1,'cutlass::gemm']]], + ['gemmepilogue',['GemmEpilogue',['../structcutlass_1_1gemm_1_1GemmEpilogue.html',1,'cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ab10147070c3a38fca75397f55dc51925',1,'cutlass::gemm::GemmEpilogue::GemmEpilogue()']]], + ['gemmepiloguetraits',['GemmEpilogueTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a4a0f361b5c47d0ab5f3308cd3b3b6ef6',1,'cutlass::gemm::HgemmTraitsHelper::GemmEpilogueTraits()']]], + ['gemmepiloguetraits_3c_20gemmconfig_5f_3a_3aoutputtile_2c_20gemmconfig_5f_3a_3aaccumulators_2c_20helper_5f_3a_3agloballoaditeratorc_2c_20helper_5f_3a_3aglobaltransformerc_2c_20helper_5f_3a_3aglobaltransformerd_2c_20helper_5f_3a_3aglobalstoreiteratord_2c_20helper_5f_3a_3asharedstoreiteratord_2c_20helper_5f_3a_3asharedstoretransformerd_2c_20helper_5f_3a_3asharedloaditeratord_2c_20helper_5f_3a_3aiterations_2c_20helper_5f_3a_3adelta_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['gemmepiloguetraits_3c_20igemmconfig_5f_3a_3aoutputtile_2c_20igemmconfig_5f_3a_3aaccumulators_2c_20helper_5f_3a_3agloballoaditeratorc_2c_20helper_5f_3a_3aglobaltransformerc_2c_20helper_5f_3a_3aglobaltransformerd_2c_20helper_5f_3a_3aglobalstoreiteratord_2c_20helper_5f_3a_3asharedstoreiteratord_2c_20helper_5f_3a_3asharedstoretransformerd_2c_20helper_5f_3a_3asharedloaditeratord_2c_20helper_5f_3a_3aiterations_2c_20helper_5f_3a_3adelta_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['gemmepiloguetraitshelper',['GemmEpilogueTraitsHelper',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['gemmepiloguetraitshelper_3c_20igemmconfig_5f_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['gemmglobaliteratorab',['GemmGlobalIteratorAb',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html',1,'cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a34cb153d311377388e7819296a84d07e',1,'cutlass::gemm::GemmGlobalIteratorAb::GemmGlobalIteratorAb()']]], + ['gemmglobaliteratorcd',['GemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html',1,'cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6dae81995ab94c0b7f28eeeeb84a6c8d',1,'cutlass::gemm::GemmGlobalIteratorCd::GemmGlobalIteratorCd()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a64f1df43acb37a1901f0b55becaa9557',1,'cutlass::gemm::GemmGlobalIteratorCd::GemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int offset=0, int pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())']]], + ['gemmglobaltilecdtraits',['GemmGlobalTileCdTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html',1,'cutlass::gemm']]], + ['gemmglobaltiletraits',['GemmGlobalTileTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html',1,'cutlass::gemm']]], + ['gemmglobaltiletraits_3c_20gemmoperand_3a_3akc_2c_20matrixlayout_3a_3akcolumnmajor_2c_20scalar_5f_2c_20tile_5f_2c_20threads_5f_2c_20kaccesssize_5f_20_3e',['GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html',1,'cutlass::gemm']]], + ['gemmmultiplicandtraits',['GemmMultiplicandTraits',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html',1,'cutlass::gemm']]], + ['gemmoperand',['GemmOperand',['../structcutlass_1_1GemmOperand.html',1,'cutlass']]], + ['gemmoperandtraitsab',['GemmOperandTraitsAb',['../structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html',1,'cutlass::gemm']]], + ['gemmsharedloadtileatraits',['GemmSharedLoadTileATraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html',1,'cutlass::gemm']]], + ['gemmsharedloadtilebtraits',['GemmSharedLoadTileBTraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html',1,'cutlass::gemm']]], + ['gemmsharedloadtiledtraits',['GemmSharedLoadTileDTraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstoretileabtraits',['GemmSharedStoreTileAbTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstoretiledtraits',['GemmSharedStoreTileDTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstorewithskewtileabtraits',['GemmSharedStoreWithSkewTileAbTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera',['GemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html',1,'cutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ >'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a5557c86a530f5d20a35d3fa620adf417',1,'cutlass::gemm::HgemmTraitsHelper::GemmTileTraitsHelperA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ab9e10d54c81a359db0eba58a11b9a0cf',1,'cutlass::gemm::IgemmTraitsHelper::GemmTileTraitsHelperA()']]], + ['gemmtiletraitshelpera_3c_20klayout_5f_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< kLayout_, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb',['GemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html',1,'cutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ >'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a8768c2b03bea0c3601c47dde2bc7ca89',1,'cutlass::gemm::HgemmTraitsHelper::GemmTileTraitsHelperB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a095505bfcea6791accd06bf4d37b9df8',1,'cutlass::gemm::IgemmTraitsHelper::GemmTileTraitsHelperB()']]], + ['gemmtiletraitshelperb_3c_20klayout_5f_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< kLayout_, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtraits',['GemmTraits',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20gemmconfig_5f_2c_20helper_5f_3a_3agloballoadstreama_2c_20helper_5f_3a_3agloballoadstreamb_2c_20helper_5f_3a_3asharedloadstreama_2c_20helper_5f_3a_3asharedloadstreamb_2c_20epilogue_5f_2c_20identityblockswizzle_2c_20index_5f_2c_20clearaccumulators_3c_20gemmconfig_5f_3a_3aaccumulators_3a_3aelement_20_3e_20_3e',['GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20gemmconfig_5f_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3agloballoadstreama_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3agloballoadstreamb_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3asharedloadstreama_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3asharedloadstreamb_2c_20gemmepilogue_3c_20gemmepiloguetraits_5f_20_3e_2c_20identityblockswizzle_2c_20index_5f_2c_20clearaccumulators_3c_20gemmconfig_5f_3a_3aaccumulators_3a_3aelement_20_3e_20_3e',['GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20helper_5f_3a_3agemmconfig_2c_20helper_5f_3a_3agloballoadstreama_2c_20helper_5f_3a_3agloballoadstreamb_2c_20helper_5f_3a_3asharedloadstreama_2c_20helper_5f_3a_3asharedloadstreamb_2c_20helper_5f_3a_3aepilogue_2c_20identityblockswizzle_2c_20index_5f_2c_20helper_5f_3a_3aclearaccumulators_20_3e',['GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['get',['get',['../classcutlass_1_1PredicateVector_1_1Iterator.html#af035589126434bd2dbef4000cd864b8b',1,'cutlass::PredicateVector::Iterator::get()'],['../structcutlass_1_1ComputeOffsetFromShape.html#a3c6f60a59178ffb84899aa449bd51d38',1,'cutlass::ComputeOffsetFromShape::get()'],['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html#a5198e838e3892245fe7b10884555ec93',1,'cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >::get()'],['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html#a11bf40abc57580db5ce4b0fd4c3e55ff',1,'cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >::get()'],['../structcutlass_1_1ComputeOffsetFromStrides.html#af5e46bc2b325cb6952d2d68c8aca1409',1,'cutlass::ComputeOffsetFromStrides::get()'],['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html#acdbb9c7cdf9fc054656614f72396434e',1,'cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >::get()'],['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html#a512a9d46f6bea9d85641d7263bcfee36',1,'cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides.html#a1744bfe277cbe0c642cce4a48c1dd9ad',1,'cutlass::ComputeThreadOffsetFromStrides::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html#a5d446b2663c01362361e09435a726996',1,'cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html#a6e621f5fae2ba29277fde46be1cede24',1,'cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >::get()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a2e7c14b8a118f81c1df46ea5045e297b',1,'cutlass::platform::unique_ptr::get()']]], + ['get_5fcoord_5fdhw',['get_Coord_dhw',['../namespacecutlass.html#a4680709eeeb679ef0219938f85f7394e',1,'cutlass']]], + ['get_5fcoord_5fhw',['get_Coord_hw',['../namespacecutlass.html#a7d2ab683e29b47d245e183ad5aeb962e',1,'cutlass::get_Coord_hw(Coord< 3 > const &coord)'],['../namespacecutlass.html#a082e7a2e4acc2879468243f5732ccf0b',1,'cutlass::get_Coord_hw(Coord< 4 > const &coord)']]], + ['get_5fcoord_5fhwc',['get_Coord_hwc',['../namespacecutlass.html#a71f3e2a12b9e98be1fba082610fa9d4f',1,'cutlass']]], + ['get_5fdeleter',['get_deleter',['../classcutlass_1_1platform_1_1unique__ptr.html#a5b8d8ecafb4da336acd50e40cd42b6e0',1,'cutlass::platform::unique_ptr::get_deleter() noexcept'],['../classcutlass_1_1platform_1_1unique__ptr.html#aa427ab4ea4f2336ac6db28d53a4c11ac',1,'cutlass::platform::unique_ptr::get_deleter() const noexcept']]], + ['getextent',['GetExtent',['../structcutlass_1_1gemm_1_1GetExtent.html',1,'cutlass::gemm']]], + ['getextent_3c_20gemmoperand_3a_3aka_2c_20tile_5f_20_3e',['GetExtent< GemmOperand::kA, Tile_ >',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html',1,'cutlass::gemm']]], + ['getextent_3c_20gemmoperand_3a_3akb_2c_20tile_5f_20_3e',['GetExtent< GemmOperand::kB, Tile_ >',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html',1,'cutlass::gemm']]], + ['global',['global',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html#a3c2980547310ec4307f3a5f9817dfc51',1,'cutlass::gemm::GemmTraits::StreamSharedStorage']]], + ['global_5fstream_5fa',['global_stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a575bcff901d69ae3f46987222f23ab64',1,'cutlass::gemm::GemmTraits::Params']]], + ['global_5fstream_5fb',['global_stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a46affe35cb16874de5a2b9777aedf596',1,'cutlass::gemm::GemmTraits::Params']]], + ['globalfragmentc',['GlobalFragmentC',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad8e5337f3d19437e9c4cafcfcc3e3d3e',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['globalfragmentd',['GlobalFragmentD',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a723cd69ee4d5c26579b36e02c531ea88',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['globaliterator',['GlobalIterator',['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a56d3f2606f9464ec57aa61aae378c642',1,'cutlass::gemm::HgemmSwizzle::GlobalIterator()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a880878914c25db44a1781725c24af514',1,'cutlass::gemm::IgemmSwizzle::GlobalIterator()']]], + ['globalloaditeratora',['GlobalLoadIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a5687850f235d644a4820851880740d27',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ab8a3def34300afb5745453d0b33204aa',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ac7ee33e683e48511a1a220df6c9d4758',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadIteratorA()']]], + ['globalloaditeratorb',['GlobalLoadIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a362794738bc14b283a91558bcadbbfd5',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a95559f28cab076da723e4cb24351116e',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a3a6d816852cca926afa08103f754477b',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadIteratorB()']]], + ['globalloaditeratorc',['GlobalLoadIteratorC',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#aecb5429363c7156ee3ad596fe250120a',1,'cutlass::gemm::GemmEpilogue::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a8409d84ee282a4d6953bd41149d8b9c2',1,'cutlass::gemm::GemmEpilogueTraits::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aeea13630bb281834b717f8d9d13a9319',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a24826f99d097eea0298e6be12a6327b9',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalLoadIteratorC()']]], + ['globalloadstream',['GlobalLoadStream',['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream'],['../structcutlass_1_1gemm_1_1GlobalLoadStream.html',1,'cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >'],['../structcutlass_1_1gemm_1_1GlobalLoadStream.html#a4dd11a75375b6b9d7b8dcbd4d402d8d6',1,'cutlass::gemm::GlobalLoadStream::GlobalLoadStream()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#ab2961b4db0694cf128d55d38a98db575',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::GlobalLoadStream()']]], + ['globalloadstreama',['GlobalLoadStreamA',['../structcutlass_1_1gemm_1_1GemmTraits.html#a9cd6c3fddfb4315eb52b672900462c47',1,'cutlass::gemm::GemmTraits::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a448c242880183e006b70d839d210a2ec',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a2aaece6093100c71c4d587994200e3bb',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a7fb1354154f303642da72e6fd157d846',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadStreamA()']]], + ['globalloadstreamb',['GlobalLoadStreamB',['../structcutlass_1_1gemm_1_1GemmTraits.html#ac393b07e780629fc8254fc22cc6f815b',1,'cutlass::gemm::GemmTraits::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#aad467ed9a680b4d77acecb096799cd89',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#abaf5f16ab0b215b406766ecadab29394',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a88e66ee760aea03687e7b3ccc6ea535b',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadStreamB()']]], + ['globalloadstreambase',['GlobalLoadStreamBase',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html',1,'cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0fdc0f56d1352b5ad41fd4985edd3278',1,'cutlass::gemm::GlobalLoadStreamBase::GlobalLoadStreamBase()']]], + ['globalloadtiletraits',['GlobalLoadTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a94f00f94a88588522ca3f9f0197a5a9b',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalLoadTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#aaa009025dcd6360ead1dc18005688821',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalLoadTileTraits()']]], + ['globalstoreiteratord',['GlobalStoreIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a1c766374d900535c944cf2a2de6925f4',1,'cutlass::gemm::GemmEpilogue::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#aeef5745d149770c9f79e12f6d97ffce1',1,'cutlass::gemm::GemmEpilogueTraits::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a23be7b4b498c17f9235a2b4896f1bffb',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad3e937c15bfac443b0e3b94d702f46b2',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalStoreIteratorD()']]], + ['globalstoretiletraits',['GlobalStoreTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a16d7df2934c3c59d9b8f36f7a2137aee',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a16b06a1611dbd22adaa0c9ee5e1b15bd',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalStoreTileTraits()']]], + ['globaltiletraits',['GlobalTileTraits',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#adc95f4a8617cdf28e5b5d7d2d1aefec2',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a8160a260acce2362e90d43bce733c69d',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a5fee0ed52326c0685e8d8295e40ce064',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#afbc41e7b98097b153fd27a48f073a877',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a36e082b2da22d17eeb73af6bd0632314',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a1e6356bf5c87271ab9794fcc79edc145',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a738774d1eb79de7e29c372ddfd48258d',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a24f38105e3c331c733cb672c3a9be588',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()']]], + ['globaltransformera',['GlobalTransformerA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#af9a98d39d6959a9641f7c3c90df2f98e',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalTransformerA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a3fb86b6d3e353df6b752510d64c5e647',1,'cutlass::gemm::HgemmTraitsHelper::GlobalTransformerA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a23bb732b7237bcabe3667408f288844d',1,'cutlass::gemm::IgemmTraitsHelper::GlobalTransformerA()']]], + ['globaltransformerb',['GlobalTransformerB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a437070ba4a214aee363315d6019e450c',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalTransformerB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a7b4de712868095200a338802c1fbb3de',1,'cutlass::gemm::HgemmTraitsHelper::GlobalTransformerB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a600bcc571ea5e04a98663c134d4664b9',1,'cutlass::gemm::IgemmTraitsHelper::GlobalTransformerB()']]], + ['globaltransformerc',['GlobalTransformerC',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a41edfd24b7dd2759f8b72ae8534182a9',1,'cutlass::gemm::GemmEpilogue::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a051f25a4aa3ea71ff400582228adbdaa',1,'cutlass::gemm::GemmEpilogueTraits::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a0682b61d1a1a951026ff026bff9361bb',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad0116b2e7b2ca1526246e2ff7e73fd2f',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalTransformerC()']]], + ['globaltransformerd',['GlobalTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a32f618ff19d984447fba7355d46a69a7',1,'cutlass::gemm::GemmEpilogue::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a261e526c6a8e832bc483bf4e486cc9d7',1,'cutlass::gemm::GemmEpilogueTraits::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ae96c5a3d58dc7a95543f8749f762ca43',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a880293ef6a48a0f4941c8f984c36f591',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalTransformerD()']]], + ['good',['good',['../classcutlass_1_1TensorRef.html#a0c049e523ee0fc98769ed8cd2d026780',1,'cutlass::TensorRef::good()'],['../classcutlass_1_1TensorView.html#a837881bc82704491accf54aad2b9def9',1,'cutlass::TensorView::good()']]], + ['greater',['greater',['../structcutlass_1_1platform_1_1greater.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/all_8.html b/docs/generated-html/search/all_8.html new file mode 100644 index 00000000..b74d5fd8 --- /dev/null +++ b/docs/generated-html/search/all_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_8.js b/docs/generated-html/search/all_8.js new file mode 100644 index 00000000..684d3e47 --- /dev/null +++ b/docs/generated-html/search/all_8.js @@ -0,0 +1,22 @@ +var searchData= +[ + ['hgemm_5fglobal_5ftile_2eh',['hgemm_global_tile.h',['../hgemm__global__tile_8h.html',1,'']]], + ['hgemm_5fmultiply_5fadd_2eh',['hgemm_multiply_add.h',['../hgemm__multiply__add_8h.html',1,'']]], + ['hgemm_5fswizzle_2eh',['hgemm_swizzle.h',['../hgemm__swizzle_8h.html',1,'']]], + ['hgemm_5ftraits_2eh',['hgemm_traits.h',['../hgemm__traits_8h.html',1,'']]], + ['hgemmconfig',['HgemmConfig',['../structcutlass_1_1gemm_1_1HgemmConfig.html',1,'cutlass::gemm']]], + ['hgemmcrosswiseglobaltiletraits',['HgemmCrosswiseGlobalTileTraits',['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html',1,'cutlass::gemm']]], + ['hgemmswizzle',['HgemmSwizzle',['../structcutlass_1_1gemm_1_1HgemmSwizzle.html',1,'cutlass::gemm::HgemmSwizzle< GlobalIterator_ >'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ac3c52e0fee9b37a3dfc39ca168a63d36',1,'cutlass::gemm::HgemmSwizzle::HgemmSwizzle()']]], + ['hgemmtiletraitshelpera',['HgemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelpera_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelperb',['HgemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelperb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['hgemmtraits',['HgemmTraits',['../structcutlass_1_1gemm_1_1HgemmTraits.html',1,'cutlass::gemm']]], + ['hgemmtraitshelper',['HgemmTraitsHelper',['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html',1,'cutlass::gemm']]], + ['hgemmtransformera',['HgemmTransformerA',['../structcutlass_1_1gemm_1_1HgemmTransformerA.html',1,'cutlass::gemm']]], + ['hgemmtransformera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformera_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformerb',['HgemmTransformerB',['../structcutlass_1_1gemm_1_1HgemmTransformerB.html',1,'cutlass::gemm']]], + ['hgemmtransformerb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformerb_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/all_9.html b/docs/generated-html/search/all_9.html new file mode 100644 index 00000000..95e88dd2 --- /dev/null +++ b/docs/generated-html/search/all_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_9.js b/docs/generated-html/search/all_9.js new file mode 100644 index 00000000..fe5b5cd3 --- /dev/null +++ b/docs/generated-html/search/all_9.js @@ -0,0 +1,107 @@ +var searchData= +[ + ['identity',['Identity',['../structcutlass_1_1Identity.html',1,'cutlass']]], + ['identity_5fblock_5fswizzle_2eh',['identity_block_swizzle.h',['../identity__block__swizzle_8h.html',1,'']]], + ['identityblockswizzle',['IdentityBlockSwizzle',['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html',1,'cutlass::gemm::IdentityBlockSwizzle'],['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html#abfde9b316173b1c0b8622cf22ffb6d68',1,'cutlass::gemm::IdentityBlockSwizzle::IdentityBlockSwizzle()']]], + ['idx',['idx',['../structcutlass_1_1Coord.html#a50de265129f1db7bdf2f0aefbc6a46bc',1,'cutlass::Coord']]], + ['igemm_5fepilogue_2eh',['igemm_epilogue.h',['../igemm__epilogue_8h.html',1,'']]], + ['igemm_5fglobal_5ftile_2eh',['igemm_global_tile.h',['../igemm__global__tile_8h.html',1,'']]], + ['igemm_5fmultiply_5fadd_2eh',['igemm_multiply_add.h',['../igemm__multiply__add_8h.html',1,'']]], + ['igemm_5fswizzle_2eh',['igemm_swizzle.h',['../igemm__swizzle_8h.html',1,'']]], + ['igemm_5ftraits_2eh',['igemm_traits.h',['../igemm__traits_8h.html',1,'']]], + ['igemmconfig',['IgemmConfig',['../structcutlass_1_1gemm_1_1IgemmConfig.html',1,'cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ >'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a5a52727bb9b5d5f8afa7d0384f564036',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::IgemmConfig()']]], + ['igemmconfig_3c_20outputtile_5f_2c_20int8_5ft_2c_20accumulatorsperthread_5f_20_3e',['IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >',['../structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html',1,'cutlass::gemm']]], + ['igemmcontiguousglobaltiletraits',['IgemmContiguousGlobalTileTraits',['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html',1,'cutlass::gemm']]], + ['igemmepilogue',['IgemmEpilogue',['../structcutlass_1_1gemm_1_1IgemmEpilogue.html',1,'cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >'],['../structcutlass_1_1gemm_1_1IgemmEpilogue.html#ab7a51121d24250d6441ee538e6521dc2',1,'cutlass::gemm::IgemmEpilogue::IgemmEpilogue()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html#a49ac00bed1532707aacd3ff108c84623',1,'cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::IgemmEpilogue()']]], + ['igemmepilogue_3c_20gemmepiloguetraits_5f_2c_20true_20_3e',['IgemmEpilogue< GemmEpilogueTraits_, true >',['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html',1,'cutlass::gemm']]], + ['igemmepiloguescalar',['IgemmEpilogueScalar',['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html',1,'cutlass::gemm']]], + ['igemmepiloguescalar_3c_20int_20_3e',['IgemmEpilogueScalar< int >',['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html',1,'cutlass::gemm']]], + ['igemmepiloguetraits',['IgemmEpilogueTraits',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['igemmepiloguetraitshelper',['IgemmEpilogueTraitsHelper',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['igemmfloattoint8converter',['IgemmFloatToInt8Converter',['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html',1,'cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#ac65f020e93584b1bd3cdb849ff625026',1,'cutlass::gemm::IgemmFloatToInt8Converter::IgemmFloatToInt8Converter()']]], + ['igemmgloballoadtransformer',['IgemmGlobalLoadTransformer',['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html',1,'cutlass::gemm']]], + ['igemmgloballoadtransformer_3c_20fragment_3c_20int8_5ft_2c_20kelements_5f_20_3e_2c_20float_20_3e',['IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >',['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html',1,'cutlass::gemm']]], + ['igemmglobalstoretransformer',['IgemmGlobalStoreTransformer',['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html',1,'cutlass::gemm']]], + ['igemmglobalstoretransformer_3c_20float_2c_20fragment_3c_20int8_5ft_2c_20kelements_5f_20_3e_20_3e',['IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >',['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html',1,'cutlass::gemm']]], + ['igemmint8tofloatconverter',['IgemmInt8ToFloatConverter',['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html',1,'cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a88a55a494d3a30d50477d50bf6a8804d',1,'cutlass::gemm::IgemmInt8ToFloatConverter::IgemmInt8ToFloatConverter()']]], + ['igemmsharedstoretransformer',['IgemmSharedStoreTransformer',['../structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html',1,'cutlass::gemm']]], + ['igemmswizzle',['IgemmSwizzle',['../structcutlass_1_1gemm_1_1IgemmSwizzle.html',1,'cutlass::gemm::IgemmSwizzle< GlobalIterator_ >'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#ac041d287c966cf568599d7e462e81d5a',1,'cutlass::gemm::IgemmSwizzle::IgemmSwizzle()']]], + ['igemmtiletraitshelpera',['IgemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelpera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelperb',['IgemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelperb_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['igemmtraits',['IgemmTraits',['../structcutlass_1_1gemm_1_1IgemmTraits.html',1,'cutlass::gemm']]], + ['igemmtraitshelper',['IgemmTraitsHelper',['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html',1,'cutlass::gemm']]], + ['igemmtransformera',['IgemmTransformerA',['../structcutlass_1_1gemm_1_1IgemmTransformerA.html',1,'cutlass::gemm']]], + ['igemmtransformera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformera_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformerb',['IgemmTransformerB',['../structcutlass_1_1gemm_1_1IgemmTransformerB.html',1,'cutlass::gemm']]], + ['igemmtransformerb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformerb_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['immediateoffsetstrides',['ImmediateOffsetStrides',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#abc47717230ddde3edc88d2770f6841bf',1,'cutlass::gemm::GemmGlobalTileTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a14e9713b0cd34af433c3cae9b283b54c',1,'cutlass::gemm::GemmGlobalTileCdTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a027bebceeda2287b40915ffd95d494a7',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a39414f484da7f993bc96d61c97273614',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a8e767b5e2fb95b0b02a0ea3e8ea58368',1,'cutlass::gemm::GemmSharedLoadTileATraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a5e4204b52ee081a37e824ca71c291c03',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ac585815d08290d9a5a9cdbd611ffdac4',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a9cfb32f902593e7dc018ee802c3520b8',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af53d49bad7060b87a2761fe8a82a7ddd',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::ImmediateOffsetStrides()'],['../structcutlass_1_1TileIteratorBase.html#a561ceb1093b28b8dce67df0129b7b8b8',1,'cutlass::TileIteratorBase::ImmediateOffsetStrides()']]], + ['inc_5fadvance',['inc_advance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a8c2618ac16362a8362dcddeed71c41d4',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a857db0c999250248b104f17f13fe9bd8',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::inc_advance()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a1187258cd4068a627e73bee0302f1fc2',1,'cutlass::TileIteratorBase::Params::inc_advance()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a9dea455aa86bb59517b4a4d0309e424b',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_advance()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ab4b8150f19c9f8649d75c69ec0a76e1a',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a174ae7d8aa0664eaf1d6f63c5606baa0',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_advance()'],['../structcutlass_1_1TileLoadIterator.html#a91e13a7aad4b0acac002b6dd125abc37',1,'cutlass::TileLoadIterator::inc_advance()'],['../structcutlass_1_1TileStoreIterator.html#a1614b27755cf82c0e1f3e7852c5a4c75',1,'cutlass::TileStoreIterator::inc_advance()']]], + ['inc_5fc',['inc_c',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a12ead84ea9634e963d10c6df7b7792c9',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_c()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a44287250bf5631a490b514859fd101d1',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_c()']]], + ['inc_5fd',['inc_d',['../structcutlass_1_1TileIteratorBase_1_1Params.html#af95fa1b5102176a0fa9b17713fd48150',1,'cutlass::TileIteratorBase::Params::inc_d()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a1e42503e5a54cdc01308e9030aebdd35',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_d()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ad26ab8d8010c9a1d7f3b91f60940b460',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_d()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab1ebbe54e4315ac07daf260a88f41d04',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_d()'],['../structcutlass_1_1TileLoadIterator.html#a0a93f37fd366a48c4ed6cc39aa850eb5',1,'cutlass::TileLoadIterator::inc_d()'],['../structcutlass_1_1TileStoreIterator.html#a74dffe1ddcc84935ab170117e939b7e3',1,'cutlass::TileStoreIterator::inc_d()']]], + ['inc_5fh',['inc_h',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#aed94505e5a269d5f33499e71284104f5',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a6306f771718c0c05276e103f30f862b2',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::inc_h()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#aea591d4278a8338ae8b50fa0b8f3a366',1,'cutlass::TileIteratorBase::Params::inc_h()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#aa24336597f4a3316d94df6ab0c20f714',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_h()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ae07fa10a53d44471a04275145201299e',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa9a733f35e9be67663c9c8f80b0034d4',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_h()'],['../structcutlass_1_1TileLoadIterator.html#a228a95cf2c9c6089287984fcbf5cface',1,'cutlass::TileLoadIterator::inc_h()'],['../structcutlass_1_1TileStoreIterator.html#a3793f5d5846862f22f1de736e36ae7c1',1,'cutlass::TileStoreIterator::inc_h()']]], + ['inc_5fstage',['inc_stage',['../structcutlass_1_1gemm_1_1SharedLoadStream.html#acf22fd09aa537943c16b900d66f1ec6f',1,'cutlass::gemm::SharedLoadStream::inc_stage()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a8851150a49e4a9c135279c8c9dfdc592',1,'cutlass::gemm::GemmTraits::SharedLoadStream::inc_stage()'],['../structcutlass_1_1TileLoadIterator.html#aeb3faf5e8f976f5a4d158ceb41a1cc64',1,'cutlass::TileLoadIterator::inc_stage()'],['../structcutlass_1_1TileStoreIterator.html#a187e0852ec4862f6d3cb6249bedc3bb3',1,'cutlass::TileStoreIterator::inc_stage()']]], + ['inc_5fw',['inc_w',['../structcutlass_1_1TileIteratorBase_1_1Params.html#ac6e81450a2d78555a6c2415dcc42b178',1,'cutlass::TileIteratorBase::Params::inc_w()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a622a4dd27162854ec96efea93cdd4380',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_w()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aec2d692967d9be5d42673dfde21f5427',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_w()'],['../structcutlass_1_1TileLoadIterator.html#a49cf3ee608debebf451cdd8c2125d073',1,'cutlass::TileLoadIterator::inc_w()'],['../structcutlass_1_1TileStoreIterator.html#aa573a47a9ffc3e07239a09e2bc470cf1',1,'cutlass::TileStoreIterator::inc_w()']]], + ['index',['Index',['../structcutlass_1_1gemm_1_1Gemm.html#a0aca711d07245f3071adeb1111fedd34',1,'cutlass::gemm::Gemm::Index()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a07c93d583bfddd8f916fba6ef809832e',1,'cutlass::gemm::GemmEpilogue::Index()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#ab430d05bd17efd60c28077c87b5ca331',1,'cutlass::gemm::GemmEpilogueTraits::Index()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a6a6e38022606dd8d41cf7264fb059cc2',1,'cutlass::gemm::GlobalLoadStreamBase::Index()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a7ff9cae930c8a6bb9c8ee6d81cb1953f',1,'cutlass::gemm::GemmGlobalIteratorAb::Index()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a56847e834b31b88544093c3df54d299f',1,'cutlass::gemm::GemmGlobalIteratorCd::Index()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#ae67227cecbe84f5c8497d9a7ff82b367',1,'cutlass::gemm::GemmTraits::Index()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a3f45216454a550a116935aede0bda3de',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Index()'],['../structcutlass_1_1TileIteratorBase.html#a44665808adfd69df0d26cec4b1840cc3',1,'cutlass::TileIteratorBase::Index()'],['../structcutlass_1_1TileLoadIterator.html#aaa83f05e0cb3204053c3ee1da036cd36',1,'cutlass::TileLoadIterator::Index()'],['../structcutlass_1_1TileStoreIterator.html#a5ac2280dfcac08cec17b8c0db1c4593e',1,'cutlass::TileStoreIterator::Index()']]], + ['initialize',['initialize',['../structcutlass_1_1gemm_1_1Gemm_1_1Params.html#ac00c9d78a187d9c7d53399f971c0e129',1,'cutlass::gemm::Gemm::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a3e9d0fd2989fea776b0cab0e0f2813ce',1,'cutlass::gemm::GemmEpilogueTraits::Params::initialize()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a7c7e448384156c801ed362359a1a6a40',1,'cutlass::gemm::GlobalLoadStreamBase::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html#a73091e07b6d4c99f6e0319fbf6bd1709',1,'cutlass::gemm::GemmGlobalIteratorAb::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#af5a496f1b6a46ea6a9894512029add6a',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::initialize()'],['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html#adb66103b905b35a1594c6f0bab65758a',1,'cutlass::gemm::SharedLoadStream::Params::initialize()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a8e4d277325bb5e56c718a2298b60d3cf',1,'cutlass::gemm::SharedLoadStream::initialize()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a40023f0ffdd8bee4ccbcaac28222e983',1,'cutlass::gemm::GemmTraits::Params::initialize()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a4946e45e10661307f562b27bad5cb72d',1,'cutlass::gemm::LinearScaling::Params::initialize()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#ad6b65c5f3ed7cd9e7ffeb684cbf30d04',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::initialize()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#ad2631ffcc963638aa5b016c66a2e2c55',1,'cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a3ba93370bd4b2ede4bd4eb97ac0881be',1,'cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w)'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#af496afebb8983e5d346c681334955224',1,'cutlass::TileIteratorBase::Params::initialize()'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aeeea0f8bdee876553a4908b9b7cbaf76',1,'cutlass::TileLoadIterator::Params::initialize(SharedStorage const &storage)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#afd9e82df76ad35fe883b7834457242b2',1,'cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aa3922946bb0da0c0040dec44aa389ec1',1,'cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aebaecd0f971245ffc5a50fe5f7a9b4e8',1,'cutlass::TileLoadIterator::Params::initialize()'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#a71f5238a712f7b2f377fb58938ac829b',1,'cutlass::TileStoreIterator::Params::initialize(SharedStorage &storage)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#af0d26a2df2a1a5ba3c3169b736bd5d43',1,'cutlass::TileStoreIterator::Params::initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#ac1cfe92f1543ba445fa10f1859a0db98',1,'cutlass::TileStoreIterator::Params::initialize(Scalar *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#af884f720d36aa82e7f972932686ae986',1,'cutlass::TileStoreIterator::Params::initialize()']]], + ['initialize_5fpredicates',['initialize_predicates',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ab9375d9e779dcda79a5cd561bb3762ff',1,'cutlass::gemm::GemmGlobalIteratorAb::initialize_predicates()'],['../structcutlass_1_1TileIteratorBase.html#a78b6c0d6a1a96dd55a34bc302ecb07d7',1,'cutlass::TileIteratorBase::initialize_predicates()'],['../structcutlass_1_1TileLoadIterator.html#a8291a51bf96f86bc77d0e3453345dbd5',1,'cutlass::TileLoadIterator::initialize_predicates()'],['../structcutlass_1_1TileStoreIterator.html#af92ba20db048a9ec96976a1673f0f7c2',1,'cutlass::TileStoreIterator::initialize_predicates()']]], + ['inputfragment',['InputFragment',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#ac7906301019c3e6d60985c3851f1e95e',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::InputFragment()'],['../structcutlass_1_1Copy.html#aed254bbc1ad94ed9d335ab02f199ceb1',1,'cutlass::Copy::InputFragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ab5fab63d83eb0444c08bda16491d2627',1,'cutlass::gemm::HgemmSwizzle::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#aa9a4b05f9fc28b80a4ae4aabb2ce1e8c',1,'cutlass::gemm::IgemmFloatToInt8Converter::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a702ca51abc077355a2d7343976a0cfdb',1,'cutlass::gemm::IgemmInt8ToFloatConverter::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a24a0bd5a9251ba5204b35eb4c4ac7727',1,'cutlass::gemm::IgemmSwizzle::InputFragment()']]], + ['instructionshape',['InstructionShape',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#ac93ba536992debeae86087e638167a13',1,'cutlass::gemm::FragmentMultiplyAdd::InstructionShape()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#ab16a3d8adda89cc4f9765116ea75a4b7',1,'cutlass::gemm::FragmentMultiplyAdd< half >::InstructionShape()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a3a57d05f50932d718538f0d1ededa95b',1,'cutlass::gemm::GemmConfig::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#aa56cdefa659af5ce4efd493b94bafdfd',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#ad73372a37315b0c17a8db21e40a78574',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ac6381210d447fda9b0e9a028d167f22b',1,'cutlass::gemm::ThreadMultiplyAdd::InstructionShape()']]], + ['integral_5fconstant',['integral_constant',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_20v_20_3e',['integral_constant< bool, V >',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5farithmetic_3c_20t_20_3e_3a_3avalue_7c_7cis_5fvoid_3c_20t_20_3e_3a_3avalue_7c_7cis_5fsame_3c_20nullptr_5ft_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fbase_5fof_5fhelper_3c_20remove_5fcv_3c_20baset_20_3e_3a_3atype_2c_20remove_5fcv_3c_20derivedt_20_3e_3a_3atype_20_3e_3a_3avalue_29_7c_7c_28is_5fsame_3c_20remove_5fcv_3c_20baset_20_3e_3a_3atype_2c_20remove_5fcv_3c_20derivedt_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_base_of_helper< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)||(is_same< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5ffundamental_3c_20t_20_3e_3a_3avalue_7c_7cis_5fpointer_3c_20t_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fintegral_3c_20t_20_3e_3a_3avalue_7c_7cis_5ffloating_5fpoint_3c_20t_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fsame_3c_20float_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_7c_7cis_5fsame_3c_20double_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28n_20_26_28n_20_2d_201_29_29_3d_3d0_20_3e',['integral_constant< bool,(N &(N - 1))==0 >',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['is_5farithmetic',['is_arithmetic',['../structcutlass_1_1platform_1_1is__arithmetic.html',1,'cutlass::platform']]], + ['is_5fbase_5fof',['is_base_of',['../structcutlass_1_1platform_1_1is__base__of.html',1,'cutlass::platform']]], + ['is_5fbase_5fof_5fhelper',['is_base_of_helper',['../structcutlass_1_1platform_1_1is__base__of__helper.html',1,'cutlass::platform']]], + ['is_5ffloating_5fpoint',['is_floating_point',['../structcutlass_1_1platform_1_1is__floating__point.html',1,'cutlass::platform']]], + ['is_5ffundamental',['is_fundamental',['../structcutlass_1_1platform_1_1is__fundamental.html',1,'cutlass::platform']]], + ['is_5fintegral',['is_integral',['../structcutlass_1_1platform_1_1is__integral.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20char_20_3e',['is_integral< char >',['../structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20const_20t_20_3e',['is_integral< const T >',['../structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20const_20volatile_20t_20_3e',['is_integral< const volatile T >',['../structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20int_20_3e',['is_integral< int >',['../structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20long_20_3e',['is_integral< long >',['../structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20long_20long_20_3e',['is_integral< long long >',['../structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20short_20_3e',['is_integral< short >',['../structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20signed_20char_20_3e',['is_integral< signed char >',['../structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20char_20_3e',['is_integral< unsigned char >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20int_20_3e',['is_integral< unsigned int >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20long_20_3e',['is_integral< unsigned long >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20long_20long_20_3e',['is_integral< unsigned long long >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20short_20_3e',['is_integral< unsigned short >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20volatile_20t_20_3e',['is_integral< volatile T >',['../structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fpointer',['is_pointer',['../structcutlass_1_1platform_1_1is__pointer.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper',['is_pointer_helper',['../structcutlass_1_1platform_1_1is__pointer__helper.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper_3c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e',['is_pointer_helper< remove_cv< T >::type >',['../structcutlass_1_1platform_1_1is__pointer__helper.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper_3c_20t_20_2a_20_3e',['is_pointer_helper< T * >',['../structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html',1,'cutlass::platform']]], + ['is_5fpow2',['is_pow2',['../structcutlass_1_1is__pow2.html',1,'cutlass']]], + ['is_5fsame',['is_same',['../structcutlass_1_1platform_1_1is__same.html',1,'cutlass::platform']]], + ['is_5fsame_3c_20a_2c_20a_20_3e',['is_same< A, A >',['../structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html',1,'cutlass::platform']]], + ['is_5fsame_3c_20void_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e',['is_same< void, remove_cv< T >::type >',['../structcutlass_1_1platform_1_1is__same.html',1,'cutlass::platform']]], + ['is_5ftrivially_5fcopyable',['is_trivially_copyable',['../structcutlass_1_1platform_1_1is__trivially__copyable.html',1,'cutlass::platform']]], + ['is_5fvoid',['is_void',['../structcutlass_1_1platform_1_1is__void.html',1,'cutlass::platform']]], + ['is_5fvolatile',['is_volatile',['../structcutlass_1_1platform_1_1is__volatile.html',1,'cutlass::platform']]], + ['is_5fvolatile_3c_20volatile_20t_20_3e',['is_volatile< volatile T >',['../structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fzero',['is_zero',['../structcutlass_1_1PredicateVector.html#a1c4fe2bec906cd7937428ed6561ac79a',1,'cutlass::PredicateVector::is_zero()'],['../namespacecutlass_1_1gemm.html#a3e30ae89e6f7501725028144cd2d88cb',1,'cutlass::gemm::is_zero(T x)'],['../namespacecutlass_1_1gemm.html#a4a12fcfae60f26efa47bf0a79483d8ac',1,'cutlass::gemm::is_zero(half x)']]], + ['isvector',['IsVector',['../structcutlass_1_1VectorTraits.html#abf96ea5dfd3212d388cb91e48cc0e6a2',1,'cutlass::VectorTraits::IsVector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aead181209c756f25ab5870682670bb99',1,'cutlass::VectorTraits< Vector< T, Lanes > >::IsVector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a893488718d8437970c1b4ed4f4056620',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::IsVector()']]], + ['iterations',['Iterations',['../structcutlass_1_1FragmentIterator.html#a4324ae522c6463e66a64f05d2e58b5f0',1,'cutlass::FragmentIterator::Iterations()'],['../structcutlass_1_1FragmentConstIterator.html#a527100e34ed700787b1419157710dbb2',1,'cutlass::FragmentConstIterator::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a8e3c978da6ed56239783bf4db0a936ae',1,'cutlass::gemm::GemmEpilogue::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#ab00969bdda930eeb7b82985c476adf7d',1,'cutlass::gemm::GemmEpilogueTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ad7b23352072b1509d3383ee775756d2a',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Iterations()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aaf6410f99d7f995792d0ac34efd3a82f',1,'cutlass::gemm::GemmGlobalTileTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a72eebc18d31900db57fa77508016f64a',1,'cutlass::gemm::GemmGlobalTileCdTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a6125e052e47296c3ef53c8a149ffd31b',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a025445699c5c86237d8c3e48f01081ea',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#ae96e490d38ade6db4d853fb6c8f3378b',1,'cutlass::gemm::GemmSharedLoadTileATraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a27bc06b72a94e34d5da6fbfb950459b5',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a6bacc866485330f80596f634e6d14336',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a81ca35e0c5d9553d1dccc981cbd89d47',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Iterations()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#aa9b46937bea47d071d277aa212dd610b',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Iterations()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a0b9b2b7838cb13a61a16501a2662fa51',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Iterations()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a9fb4b56091d4458ebd82130bc3951e5b',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Iterations()'],['../structcutlass_1_1PredicateTileAdapter.html#a1f2d52eec9f488c2a53c4d62af824450',1,'cutlass::PredicateTileAdapter::Iterations()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#a5e461e0eb376de60605a6ab5fdc38058',1,'cutlass::ConstPredicateTileAdapter::Iterations()'],['../structcutlass_1_1TileTraits.html#af7ae2fdb4c8f1702169cc7d437d2b469',1,'cutlass::TileTraits::Iterations()'],['../structcutlass_1_1TileIteratorBase.html#a352ed0773b37f03bf68e4b6cf9899474',1,'cutlass::TileIteratorBase::Iterations()'],['../structcutlass_1_1TileLoadIterator.html#a9720b1e4a10c2d5aa85f9a9c66a31bbf',1,'cutlass::TileLoadIterator::Iterations()'],['../structcutlass_1_1TileStoreIterator.html#a552a67fb03c28e985d143f6193f88308',1,'cutlass::TileStoreIterator::Iterations()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a03a32694da75bb95422c6b550e3324e2',1,'cutlass::TileTraitsStrideMajor::Iterations()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a425a20b642ae8736c12626b2de9b8b82',1,'cutlass::TileTraitsContiguousMajor::Iterations()'],['../structcutlass_1_1TileTraitsWarpRake.html#a410e44aa83f2179152a48f7aceb05323',1,'cutlass::TileTraitsWarpRake::Iterations()']]], + ['iterationsstrides',['IterationsStrides',['../structcutlass_1_1FragmentConstIterator.html#ab683796885f3bae3765efd96883f311b',1,'cutlass::FragmentConstIterator']]], + ['iterator',['Iterator',['../classcutlass_1_1PredicateVector_1_1Iterator.html',1,'cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a6925270c4ad157554ab155cddc7b46e6',1,'cutlass::gemm::SharedLoadStream::Iterator()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a91b7d25cbd64e696ef23c87671f0b077',1,'cutlass::PredicateVector::Iterator::Iterator(Iterator const &it)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a83c2f584bd061f0b9b6b2a6cddf5b038',1,'cutlass::PredicateVector::Iterator::Iterator(PredicateVector &_vec, int _start=0)'],['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html#ae59f871c06a0ac7b9224f0de923082d7',1,'cutlass::gemm::SharedLoadStream::Params::iterator()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a54481a42d4125e3693a086269d9a7b10',1,'cutlass::gemm::SharedLoadStream::iterator()']]], + ['iterator_5faccess_2eh',['iterator_access.h',['../iterator__access_8h.html',1,'']]], + ['iterator_5fc',['iterator_c',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a7350ceefcd09a9e3662ca30b780cc2ce',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['iterator_5fd',['iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a987c179a7e73c2572fe8aef3255668f7',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['iterator_5fload',['iterator_load',['../namespacecutlass.html#a45dd7add04736cb5c3e69991d2f210be',1,'cutlass::iterator_load(InputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#a50f08aa93d7fe6825599d17e3c977031',1,'cutlass::iterator_load(InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#aca491136bdb966638a7ae57c47f86d1e',1,'cutlass::iterator_load(InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0)'],['../namespacecutlass.html#af25d56f7391322d9a3b9aa3c507f90dc',1,'cutlass::iterator_load(InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fload_5fpost_5fincrement',['iterator_load_post_increment',['../namespacecutlass.html#a3965068d8a4fdfe5e05782930fb4fe6b',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#af5abe551df7461eab66aa43907063d6b',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0)'],['../namespacecutlass.html#afb8e7a4e611e8b5ae7ca19d02f791d37',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fstore',['iterator_store',['../namespacecutlass.html#a0cb5bdf7bef498705c51a9cdcbef71f9',1,'cutlass::iterator_store(OutputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#a88dce4b124a294cc123f7cf5fd2d6472',1,'cutlass::iterator_store(OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#a410ed4d45ccafc2db842967740b6211f',1,'cutlass::iterator_store(OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)'],['../namespacecutlass.html#ad804b804ac19360b293046f9cbfd8dd5',1,'cutlass::iterator_store(OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fstore_5fpost_5fincrement',['iterator_store_post_increment',['../namespacecutlass.html#a5bf15cbf4cf4649d895fcbc2edf6a2de',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#ab8efb0edefca7a59acc5a14b7311130c',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)'],['../namespacecutlass.html#a96fdb65e922f6a3d46aa5de9ea78d460',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)']]], + ['iteratoradvance',['IteratorAdvance',['../structcutlass_1_1IteratorAdvance.html',1,'cutlass']]], + ['iteratorfragment',['IteratorFragment',['../structcutlass_1_1IteratorFragment.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_a.html b/docs/generated-html/search/all_a.html new file mode 100644 index 00000000..3148a8e5 --- /dev/null +++ b/docs/generated-html/search/all_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_a.js b/docs/generated-html/search/all_a.js new file mode 100644 index 00000000..fec5dfcc --- /dev/null +++ b/docs/generated-html/search/all_a.js @@ -0,0 +1,76 @@ +var searchData= +[ + ['k',['k',['../structcutlass_1_1gemm_1_1GemmDesc.html#ac789a7e5d2db65d006f1e8e3df542a6f',1,'cutlass::gemm::GemmDesc::k()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aae3a008b39f9678a03192f6ff54152d8',1,'cutlass::gemm::GemmTraits::Params::k()']]], + ['ka',['kA',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0cac2b9fe9e3679a059d1a6c946b2a2c31a',1,'cutlass::GemmOperand']]], + ['kaccesssize',['kAccessSize',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aa001e09b246fdd8259cbda6a500cad5f',1,'cutlass::gemm::GemmGlobalTileTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ae852c89da0455025c0c41af258e47047',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a846e6d8d06be0ba6fa41b1431c8ec061',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a0a33d4289ed45e988d560b5f73ac997e',1,'cutlass::gemm::GemmSharedLoadTileATraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#aa41cc5dc82fe08457d103545f8f63081',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a9521c4017e227b2511891a7fb18513e1',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8d308d593b59624abe3e228d588be61d',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kAccessSize()'],['../structcutlass_1_1TileIteratorBase.html#aef07ba456ea016092d7d2446751b76a3',1,'cutlass::TileIteratorBase::kAccessSize()']]], + ['kaccumulatorsperldsa',['kAccumulatorsPerLdsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#abbdd356f280099269867e614684645cf',1,'cutlass::gemm::GemmConfig']]], + ['kaccumulatorsperldsb',['kAccumulatorsPerLdsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a9dd092bca2f1f2c039f367b23bafa9c1',1,'cutlass::gemm::GemmConfig']]], + ['kadvance',['kAdvance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a8c1e871f17685b16a7a41fcc888f0125',1,'cutlass::gemm::GemmGlobalIteratorAb::kAdvance()'],['../structcutlass_1_1TileIteratorBase.html#ac1a64e974dcd69c3a86a31db6cbff421',1,'cutlass::TileIteratorBase::kAdvance()'],['../structcutlass_1_1TileLoadIterator.html#a69d2f21c8188fb3229af8c2dbe0a23b6',1,'cutlass::TileLoadIterator::kAdvance()'],['../structcutlass_1_1TileStoreIterator.html#a8059c57030df99b73309e9210ec5f624',1,'cutlass::TileStoreIterator::kAdvance()']]], + ['kb',['kB',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0caad0876342d150cef7da6ae149d5e99f9',1,'cutlass::GemmOperand']]], + ['kbytes',['kBytes',['../structcutlass_1_1PredicateVector.html#ab870e074b33c598f69fe11e104615c5a',1,'cutlass::PredicateVector']]], + ['kc',['kC',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0ca7598e104da2001a76ec344f1c1b9c6dc',1,'cutlass::GemmOperand::kC()'],['../structcutlass_1_1Shape.html#a3f2433fd6401dd28f1130499f9fd340c',1,'cutlass::Shape::kC()']]], + ['kcolumnmajor',['kColumnMajor',['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2bac15988acba79c11072d38b295f163a2b',1,'cutlass::MatrixLayout']]], + ['kcount',['kCount',['../structcutlass_1_1ShapeCount.html#a8d25b48b3294b5563f89c62a6e6d00e5',1,'cutlass::ShapeCount']]], + ['kd',['kD',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0ca49eef82461e44c96462f9c4dbaab71fe',1,'cutlass::GemmOperand::kD()'],['../structcutlass_1_1Shape.html#a19086a5567d6c710ec853e35a7f29c25',1,'cutlass::Shape::kD()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaa56ecb02f4ed3bd7ae4a9c971805ee8c5',1,'cutlass::IteratorAdvance::kD()']]], + ['kdhw',['kDhw',['../structcutlass_1_1ShapeCount.html#af7d7ccd42de2c49fe57f03cf0e657fe8',1,'cutlass::ShapeCount']]], + ['kdhwc',['kDhwc',['../structcutlass_1_1ShapeCount.html#a5a274564d6b8607a0be621b2664fba18',1,'cutlass::ShapeCount']]], + ['kelements',['kElements',['../structcutlass_1_1Fragment.html#a2b9a64391d00ef23dd8d456c2337fa60',1,'cutlass::Fragment']]], + ['kelementsperaccess',['kElementsPerAccess',['../structcutlass_1_1FragmentIterator.html#ad2c43e30e78e8799df7cb02ac08cee9a',1,'cutlass::FragmentIterator::kElementsPerAccess()'],['../structcutlass_1_1FragmentConstIterator.html#a004fabc9caa6924f3fb4badcbb19e88f',1,'cutlass::FragmentConstIterator::kElementsPerAccess()']]], + ['kextent',['kExtent',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html#a881f84951bc9e47ab2be9ef3f2c1e423',1,'cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >::kExtent()'],['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html#a82ff9b447e4a58164b5f7d53d2602930',1,'cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >::kExtent()']]], + ['kfragmentsize',['kFragmentSize',['../structcutlass_1_1TileIteratorBase.html#a4e0b2bc06bb8f52313e4d8c51ab30ff2',1,'cutlass::TileIteratorBase']]], + ['kgeneric',['kGeneric',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03ca21a44c0b78017acea0d1ffe223e5ca38',1,'cutlass::MemorySpace']]], + ['kglobal',['kGlobal',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03cac4bd4070cc396d698beb7ca2e3bbff37',1,'cutlass::MemorySpace']]], + ['kh',['kH',['../structcutlass_1_1Shape.html#a3a20d9062bba613c160bb2cd14f80a5e',1,'cutlass::Shape::kH()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaacfe756fca665eb1bbf389850915c1b81',1,'cutlass::IteratorAdvance::kH()']]], + ['khw',['kHw',['../structcutlass_1_1ShapeCount.html#afc957be69eb78e4849ba8ab3cc66583f',1,'cutlass::ShapeCount']]], + ['khwc',['kHwc',['../structcutlass_1_1ShapeCount.html#a75324e2c9d31a0787343fc994586b742',1,'cutlass::ShapeCount']]], + ['kind',['Kind',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375',1,'cutlass::Identity::Kind()'],['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03c',1,'cutlass::MemorySpace::Kind()'],['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2b',1,'cutlass::MatrixLayout::Kind()'],['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0c',1,'cutlass::GemmOperand::Kind()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738dda',1,'cutlass::IteratorAdvance::Kind()'],['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80',1,'cutlass::IteratorFragment::Kind()']]], + ['kint8output',['kInt8Output',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html#a8609af98d1e43cd25688bae6f33feed4',1,'cutlass::gemm::IgemmEpilogueTraits']]], + ['kiterationsd',['kIterationsD',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8663311646210b690bb0c2a1012e82f0',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiterationsh',['kIterationsH',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a3b1a461c1dfbcd3817ab2d57bd0da9f1',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiterationsinhperwarp',['kIterationsInHPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a4b8d66df02ba1653aa6d1f23b967f237',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiteratorfragment',['kIteratorFragment',['../structcutlass_1_1TileIteratorBase.html#a38c8ec1e9d0117172981b4c7dd4bf3be',1,'cutlass::TileIteratorBase::kIteratorFragment()'],['../structcutlass_1_1TileLoadIterator.html#aba1d75a0cd5f11dee2aecf89b2b13d98',1,'cutlass::TileLoadIterator::kIteratorFragment()'],['../structcutlass_1_1TileStoreIterator.html#a94c0567316118abfb84fc28560a5a46a',1,'cutlass::TileStoreIterator::kIteratorFragment()']]], + ['kkstrided',['kKstrided',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a1984c9ef6abfd029acbc3f702593ab85',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['klanes',['kLanes',['../unioncutlass_1_1Vector.html#a824f9ab976c8e7f035236af03e5ae839a605c5e987bc7b08d743f29a6524abb27',1,'cutlass::Vector::kLanes()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#aa70d2fd36f00b63d321c1f7b6d6c3024ad242b575673ca1bf9cf311e58a966392',1,'cutlass::Vector< half, kLanes_ >::kLanes()'],['../structcutlass_1_1VectorTraits.html#a052e1e5963a9e04482b16cb881d1eaf8',1,'cutlass::VectorTraits::kLanes()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aca745b59c6c21292f119943e5a480f39',1,'cutlass::VectorTraits< Vector< T, Lanes > >::kLanes()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a43ac200035052a2c352c8c4b84aac73c',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::kLanes()']]], + ['klayout',['kLayout',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a807cffc6f69f8d30a2fc94cf49fb904c',1,'cutlass::gemm::GlobalLoadStreamBase::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a74bc07cb021a73513ab2fbacd572be90',1,'cutlass::gemm::GemmGlobalTileTraits::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#afe016e0c6234075a8d69ba7341555ece',1,'cutlass::gemm::GemmGlobalIteratorAb::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a27b88818f5b094372bf2c6e090c9148a',1,'cutlass::gemm::GemmGlobalIteratorCd::kLayout()'],['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a19076e58e60d296da74cf504e2a473fd',1,'cutlass::gemm::GemmMultiplicandTraits::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ad2010686bceb21aec9a1924ae379edc1',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aedd49525e2c849baecf88cdfd9e3515c',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#afbd350793888a7e7b299548dca854c13',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a31fa28168811e2d04fbd74029df785ab',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ae0f176733ba9dee0cce45435ac5d53ba',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::kLayout()']]], + ['klayouta',['kLayoutA',['../structcutlass_1_1gemm_1_1GemmTraits.html#ac5bb5931a707ed7672f69267753ba41b',1,'cutlass::gemm::GemmTraits']]], + ['klayoutb',['kLayoutB',['../structcutlass_1_1gemm_1_1GemmTraits.html#a078e8d9cfa1b182e1b96a2cc8c54b684',1,'cutlass::gemm::GemmTraits']]], + ['kmemoryspace',['kMemorySpace',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#af219ece6e66e2866169e06e15cc4472d',1,'cutlass::gemm::GemmGlobalTileTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a59c981aa720f983b846bed7c3e4a7cab',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#ae5a07814b9cfe9a64f69bac0f0772f20',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a4456e4c8048bfb378e5b80833a0d19e5',1,'cutlass::gemm::GemmSharedLoadTileATraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a7007093a4abf79a0b4bfb3fc85a02620',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a8914bc5154f21fa5fd182b0009c44c39',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#afb4687520eff9c6a21c35a5e04f69de8',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kMemorySpace()'],['../structcutlass_1_1TileIteratorBase.html#a871c9b82109eab432c5a1d465643bf97',1,'cutlass::TileIteratorBase::kMemorySpace()'],['../structcutlass_1_1TileLoadIterator.html#ac21bd78b31c99c826f0eddb5aa033bf1',1,'cutlass::TileLoadIterator::kMemorySpace()'],['../structcutlass_1_1TileStoreIterator.html#adaebec9eacf767f63f048033de73ea5b',1,'cutlass::TileStoreIterator::kMemorySpace()']]], + ['koperand',['kOperand',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#ae0bca976b7cfba8561db4cccc16e99e1',1,'cutlass::gemm::GemmGlobalTileTraits::kOperand()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#af511bba9fc2125516eb1442b1c88d851',1,'cutlass::gemm::GemmSharedLoadTileATraits::kOperand()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#afd4881aae69c8041d3931982d85f44e4',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kOperand()']]], + ['kpredicates',['kPredicates',['../structcutlass_1_1PredicateVector.html#afff3a2142d9853606d6ad7c3a459f492',1,'cutlass::PredicateVector']]], + ['kpredicatesperbyte',['kPredicatesPerByte',['../structcutlass_1_1PredicateVector.html#a1387c4a964f971ed4611d750a09ec0b5',1,'cutlass::PredicateVector']]], + ['kpredicatestart',['kPredicateStart',['../structcutlass_1_1PredicateVector.html#acf848dce84c01453ab8a2d00c8d4f86e',1,'cutlass::PredicateVector']]], + ['krequiresloadfence',['kRequiresLoadFence',['../structcutlass_1_1TileLoadIterator.html#a1f3601c595f12e7083919ece9b1ec84eaee9d9d6cea8079c32c9383bde45161fc',1,'cutlass::TileLoadIterator']]], + ['krowmajor',['kRowMajor',['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2ba6a287c17f9f5bf53528ae68296beeedb',1,'cutlass::MatrixLayout']]], + ['kscalar',['kScalar',['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80aeca44a186befa21ccae44eb4dc7b6954',1,'cutlass::IteratorFragment']]], + ['kscalarsin4b',['kScalarsIn4B',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ad77b9084720ad7378e033e54bfb74ce7',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsIn4B()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a774a052f0f98f50e46dda933c81badd5',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsIn4B()']]], + ['kscalarsperldga',['kScalarsPerLdgA',['../structcutlass_1_1gemm_1_1GemmConfig.html#a2e0a043c5d4d7959ec1a2214c3ac39ac',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldgb',['kScalarsPerLdgB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a849b21fed39aaac1cdd546334739be97',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldgc',['kScalarsPerLdgC',['../structcutlass_1_1gemm_1_1GemmConfig.html#aad47c635a73e83bd4b19494864832d31',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsa',['kScalarsPerLdsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#aa1b75484138923a52b32888fef608d9b',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsb',['kScalarsPerLdsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a86470d3a44e2b50ee31ec3c9f79927ef',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsd',['kScalarsPerLdsD',['../structcutlass_1_1gemm_1_1GemmConfig.html#adaf2ee5b8e6f7bdb9939cd45a186ca56',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperrow',['kScalarsPerRow',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#af1c981ec89a9cabaf5d34231d51a029c',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kScalarsPerRow()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#aa3e378cabce9ed7f199c179c15a12ca4',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kScalarsPerRow()']]], + ['kscalarsperstgd',['kScalarsPerStgD',['../structcutlass_1_1gemm_1_1GemmConfig.html#a3633083f4f778215543e376c092745d7',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperstsa',['kScalarsPerStsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#accc95abc55880abdab92253367b4b186',1,'cutlass::gemm::GemmConfig::kScalarsPerStsA()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ae396f7301f934c179e054f68f0420edf',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsPerStsA()']]], + ['kscalarsperstsb',['kScalarsPerStsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#ac0c8c027e3ede14b62d7c7d519551f21',1,'cutlass::gemm::GemmConfig::kScalarsPerStsB()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a47d99d98c783cf1d317698bd465ffa9a',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsPerStsB()']]], + ['kscalarsperstsd',['kScalarsPerStsD',['../structcutlass_1_1gemm_1_1GemmConfig.html#a3087cdd38e2c65ad0dffdd0587d2cce0',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperthread',['kScalarsPerThread',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ae0b53d76096f9d34df6e16280565c7b1',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kScalarsPerThread()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#abb5fdb164b09c8f74f92278f3d68b95f',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kScalarsPerThread()']]], + ['kshared',['kShared',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03ca2804339b2be64ff68ae3042073aaa7cc',1,'cutlass::MemorySpace']]], + ['kskew',['kSkew',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ace14ca9ad11e2cdafcd4a4b63c0df591',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#aba6decf87d770becaadd610d9fc27491',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#aaffe67e519e919bf561142e05da6e6c8',1,'cutlass::gemm::GemmSharedLoadTileATraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ac9cd90ecd02809060a2fe6e2da4210f9',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a48baee6541e6359753f1bae5bd864029',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a7e9ce187e12575f0ecd39b2bfe13dddf',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kSkew()']]], + ['kstages',['kStages',['../structcutlass_1_1gemm_1_1GemmConfig.html#a221949c289057e39d439ce03a5b01c52',1,'cutlass::gemm::GemmConfig']]], + ['kstrideh',['kStrideH',['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a87918f4d67a9c1e19dcd3c6bfc243e97',1,'cutlass::gemm::GemmGlobalTileCdTraits']]], + ['kthreads',['kThreads',['../structcutlass_1_1gemm_1_1Gemm.html#a41239809be4ebc730dd8ff28c9efc58b',1,'cutlass::gemm::Gemm::kThreads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a05039ba8b7d9890903064b1a834dcd3e',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kThreads()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8325bc9d56155ecb6f2ddbd56f4ed23d',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kThreads()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a0b2be601de08848afc4418adb97255bf',1,'cutlass::gemm::GemmConfig::kThreads()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a2b6ad449269a178018f02b8cc64ddb85',1,'cutlass::TileTraitsStrideMajor::kThreads()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a53d10552356855bf7379632e72bbe0c9',1,'cutlass::TileTraitsContiguousMajor::kThreads()'],['../structcutlass_1_1TileTraitsWarpRake.html#a11d943e15e397cbc5233b09071dff642',1,'cutlass::TileTraitsWarpRake::kThreads()'],['../structcutlass_1_1TileTraitsStandard.html#a9cbcbe09aa6e9465b63dd22d59435af1',1,'cutlass::TileTraitsStandard::kThreads()']]], + ['kthreadsperwarp',['kThreadsPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a4246185b8279f245ef5d0650c1eec14f',1,'cutlass::gemm::GemmSharedLoadTileATraits::kThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a049b0bcdf8c5318ee84edeb1e42eaf78',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kThreadsPerWarp()']]], + ['kusage',['kUsage',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a962ffde3b3db78792b67dd1f57ab0a05',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['kvalue',['kValue',['../structcutlass_1_1Extent.html#a2cb62986b9a7c168bf79b083f33c4bad',1,'cutlass::Extent::kValue()'],['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#a10f7184a9a50de0268efa45dab5dc304',1,'cutlass::Extent< Vector< T, Lanes > >::kValue()'],['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a87917a6dfbb1662416c4ea4831669aaf',1,'cutlass::Extent< Vector< T, Lanes > const >::kValue()']]], + ['kvectorsize',['kVectorSize',['../unioncutlass_1_1Vector.html#abf0c16b6f9cb8439835ebdb271d58763afaf4b62c6bcafbf961c5570364a0316e',1,'cutlass::Vector::kVectorSize()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#adc4140a7e40be1e4f81c78a657c7ba73abfbb3cf98db2f8af7150efb91cac4e79',1,'cutlass::Vector< half, kLanes_ >::kVectorSize()']]], + ['kw',['kW',['../structcutlass_1_1Shape.html#a78836a20250ff24c25a6622ad818b421',1,'cutlass::Shape::kW()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaa567e61af8a3401d302f3a3ab26418df0',1,'cutlass::IteratorAdvance::kW()']]], + ['kwarpcount',['kWarpCount',['../structcutlass_1_1TileTraitsWarpRake.html#a7a03abe44862077351b0a0a2818d214d',1,'cutlass::TileTraitsWarpRake::kWarpCount()'],['../structcutlass_1_1TileTraitsStandard.html#a1e8f90991e179d13971b84494c989d25',1,'cutlass::TileTraitsStandard::kWarpCount()']]], + ['kwarps',['kWarps',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#af78a275086a297bd93aed920f57a17be',1,'cutlass::gemm::GemmSharedLoadTileATraits::kWarps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a8b8d6a26a29d5477f526d9ce8c27e3e2',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kWarps()']]], + ['kwarpscontiguous',['kWarpsContiguous',['../structcutlass_1_1TileTraitsWarpRake.html#aede0832e95df911b1e6e3f1cc9e593ce',1,'cutlass::TileTraitsWarpRake']]], + ['kwarpsize',['kWarpSize',['../structcutlass_1_1gemm_1_1GemmConfig.html#a677d6a1711cc756b817095b7437cce0e',1,'cutlass::gemm::GemmConfig::kWarpSize()'],['../structcutlass_1_1TileTraitsWarpRake.html#ad25fb7c1b5dc8c5828a69e5a468f490b',1,'cutlass::TileTraitsWarpRake::kWarpSize()'],['../structcutlass_1_1TileTraitsStandard.html#ae9f40eb177c440f01adcc2fe9ca7ec10',1,'cutlass::TileTraitsStandard::kWarpSize()']]], + ['kwarpsstrided',['kWarpsStrided',['../structcutlass_1_1TileTraitsWarpRake.html#a8b1d3fe590f426ce11d597bb98c51bd4',1,'cutlass::TileTraitsWarpRake']]], + ['kwc',['kWc',['../structcutlass_1_1ShapeCount.html#aac5c49469aa80d119c2006291b431276',1,'cutlass::ShapeCount']]], + ['kwmmamatrix',['kWmmaMatrix',['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80a21d2b2793bab0d348df40715b8f14419',1,'cutlass::IteratorFragment']]], + ['kwordcount',['kWordCount',['../structcutlass_1_1PredicateVector.html#a734bbfaf3829f73ef0b44fa7db4ccd42',1,'cutlass::PredicateVector']]] +]; diff --git a/docs/generated-html/search/all_b.html b/docs/generated-html/search/all_b.html new file mode 100644 index 00000000..f2a3c8d0 --- /dev/null +++ b/docs/generated-html/search/all_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_b.js b/docs/generated-html/search/all_b.js new file mode 100644 index 00000000..d156678a --- /dev/null +++ b/docs/generated-html/search/all_b.js @@ -0,0 +1,27 @@ +var searchData= +[ + ['launch',['launch',['../structcutlass_1_1gemm_1_1Gemm.html#a77ae137aec79b4061a9ffa09aabf641c',1,'cutlass::gemm::Gemm::launch(Params const &params, cudaStream_t stream=cudaStreamDefault)'],['../structcutlass_1_1gemm_1_1Gemm.html#a4f4122a2ae8b9b09a9660e5c2ca9e906',1,'cutlass::gemm::Gemm::launch(CUfunction kernel, Params const &params, CUstream stream=CU_STREAM_LEGACY)']]], + ['layout_20concept',['Layout Concept',['../group__layout__concept.html',1,'']]], + ['lcm',['lcm',['../namespacecutlass.html#af07506fee11de882d926f4e8237eef09',1,'cutlass']]], + ['lda',['lda',['../structcutlass_1_1gemm_1_1GemmDesc.html#a62ad30ba419ccb661e6700da98221789',1,'cutlass::gemm::GemmDesc']]], + ['ldb',['ldb',['../structcutlass_1_1gemm_1_1GemmDesc.html#a7591ce0223b0d05c4d6fca6c67b98bfe',1,'cutlass::gemm::GemmDesc']]], + ['ldc',['ldc',['../structcutlass_1_1gemm_1_1GemmDesc.html#a0f492560cabc45cd492da65b819d09db',1,'cutlass::gemm::GemmDesc']]], + ['ldd',['ldd',['../structcutlass_1_1gemm_1_1GemmDesc.html#a3280e5c5484f5c10d1412bcb70eb77e9',1,'cutlass::gemm::GemmDesc']]], + ['leading_5fdim',['leading_dim',['../classcutlass_1_1TensorRef.html#a8e1c61910ffb49ec64930f66dd342b77',1,'cutlass::TensorRef']]], + ['less',['less',['../structcutlass_1_1platform_1_1less.html',1,'cutlass::platform']]], + ['linear_5fscaling_2eh',['linear_scaling.h',['../linear__scaling_8h.html',1,'']]], + ['linearscaling',['LinearScaling',['../structcutlass_1_1gemm_1_1LinearScaling.html',1,'cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >'],['../structcutlass_1_1gemm_1_1LinearScaling.html#a34df6970f033b3090ad8f4d40063b1b2',1,'cutlass::gemm::LinearScaling::LinearScaling()']]], + ['load',['Load',['../structcutlass_1_1Load.html',1,'cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >'],['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html#aea5ed35a44624684ffa9ada9d09a8893',1,'cutlass::gemm::GemmEpilogueTraits::StreamSharedStorage::load()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html#a01a847858cb330d7d109ddee228e96ce',1,'cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html#a014682b143bce65667075ea15fad184d',1,'cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load()'],['../structcutlass_1_1Load.html#ad033ebc1452d96b18913333bf7068140',1,'cutlass::Load::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#aa9d5e227ea20ad3c6952f296016ec167',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a0e58d26dd68aabb6cb9678f5656c7e6f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::load()'],['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#a7ba77016bee8e941f7831cc9fbfa994d',1,'cutlass::Load< double, 2, Memory_, true, 16 >::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#a4ee00178c441bdf4d4a1f8cf984bc03f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::load()'],['../structcutlass_1_1TileLoadIterator.html#a9c4b332857f419e6f789a93404dc2140',1,'cutlass::TileLoadIterator::load(Fragment &fragment, PredicateIterator pred_it) const'],['../structcutlass_1_1TileLoadIterator.html#a1058cdec33393db9c16b28c21d8957db',1,'cutlass::TileLoadIterator::load(Fragment &fragment) const']]], + ['load_3c_20double_2c_202_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Load< double, 2, Memory_, true, 16 >',['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 16 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_204_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 4 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_208_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 8 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html',1,'cutlass']]], + ['load_5fiterator',['load_iterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a42ffcba6af2b5ddfb1f4825a34d43532',1,'cutlass::gemm::GlobalLoadStreamBase::Params::load_iterator()'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html#a3be938f8661f9cd10966866b7b80b471',1,'cutlass::gemm::GlobalLoadStreamBase::SharedStorage::load_iterator()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#ad2381f2311ee8400a2dc57c19084ef5e',1,'cutlass::gemm::GlobalLoadStreamBase::load_iterator()']]], + ['load_5fpost_5fincrement',['load_post_increment',['../structcutlass_1_1TileLoadIterator.html#a2716b9010d2902b90e63abb0531ee915',1,'cutlass::TileLoadIterator::load_post_increment(Fragment &fragment, PredicateIterator pred_it)'],['../structcutlass_1_1TileLoadIterator.html#a195993d58ae0eeb53203116ac02ab38d',1,'cutlass::TileLoadIterator::load_post_increment(Fragment &fragment)']]], + ['load_5fstore_2eh',['load_store.h',['../load__store_8h.html',1,'']]], + ['loaditerator',['LoadIterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#acff2a1ab180eec672714cd587a28f9fe',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['log2_5fdown',['log2_down',['../structcutlass_1_1log2__down.html',1,'cutlass']]], + ['log2_5fdown_3c_20n_2c_201_2c_20count_20_3e',['log2_down< N, 1, Count >',['../structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html',1,'cutlass']]], + ['log2_5fup',['log2_up',['../structcutlass_1_1log2__up.html',1,'cutlass']]], + ['log2_5fup_3c_20n_2c_201_2c_20count_20_3e',['log2_up< N, 1, Count >',['../structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_c.html b/docs/generated-html/search/all_c.html new file mode 100644 index 00000000..63768107 --- /dev/null +++ b/docs/generated-html/search/all_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_c.js b/docs/generated-html/search/all_c.js new file mode 100644 index 00000000..7420bb3e --- /dev/null +++ b/docs/generated-html/search/all_c.js @@ -0,0 +1,20 @@ +var searchData= +[ + ['m',['m',['../structcutlass_1_1gemm_1_1GemmDesc.html#a5c2b3e75cb6873762ba3f85487b78579',1,'cutlass::gemm::GemmDesc::m()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac344bf5ca318dc343bd6fa6bf52d2e22',1,'cutlass::gemm::GemmEpilogue::m()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aaf27c0f2f4ab730ed5c865e9f7d2373b',1,'cutlass::gemm::GemmTraits::Params::m()']]], + ['main_5floop',['main_loop',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html#aa5dd7edc3cffa785eb1e5b62c18c74c4',1,'cutlass::gemm::GemmTraits::SharedStorage']]], + ['mainloopsharedstorage',['MainLoopSharedStorage',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html',1,'cutlass::gemm::GemmTraits']]], + ['make_5fcoord',['make_Coord',['../namespacecutlass.html#a7419519fa453a121dfa5f26bf87318d9',1,'cutlass::make_Coord(int _0)'],['../namespacecutlass.html#a61d81e5363bcb8a7f6dd70f053242564',1,'cutlass::make_Coord(int _0, int _1)'],['../namespacecutlass.html#a25acf680a7d2592c957a7ac603f4c361',1,'cutlass::make_Coord(int _0, int _1, int _2)'],['../namespacecutlass.html#a9410b1f5956d3aaf4584e65d047428fc',1,'cutlass::make_Coord(int _0, int _1, int _2, int _3)']]], + ['make_5fpair',['make_pair',['../namespacecutlass_1_1platform.html#a90ce74c7faa4e27c888ce56e957b73d5',1,'cutlass::platform']]], + ['make_5fzero',['make_zero',['../namespacecutlass.html#acdb62db582cf90cfd437fc56f4ca7bbf',1,'cutlass::make_zero(Scalar_ &x)'],['../namespacecutlass.html#abc5c00b4986db5a114e774cee9999717',1,'cutlass::make_zero(Vector< Scalar_, kLanes_ > &vec)']]], + ['matrix_5ftraits_2eh',['matrix_traits.h',['../matrix__traits_8h.html',1,'']]], + ['matrixlayout',['MatrixLayout',['../structcutlass_1_1MatrixLayout.html',1,'cutlass']]], + ['max',['max',['../namespacecutlass_1_1platform.html#af6a9a165e53d7e85ae121d5789aa03e0',1,'cutlass::platform']]], + ['memoryspace',['MemorySpace',['../structcutlass_1_1MemorySpace.html',1,'cutlass']]], + ['min',['min',['../namespacecutlass_1_1platform.html#a57c071d2a7305dd4ec60542e66b0c81c',1,'cutlass::platform']]], + ['multiplicandtraits',['MultiplicandTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a21a3524edaf002b5e5878df3c7eae7e7',1,'cutlass::gemm::GemmGlobalTileTraits']]], + ['multiplicative',['Multiplicative',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375af0cc1d8a713958a86af1063595604597',1,'cutlass::Identity']]], + ['multiply',['multiply',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a522301fbe3e276cb5ef9fbe75bb2ab50',1,'cutlass::gemm::FragmentMultiplyAdd::multiply()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#ae62d61ec068ac958753d0a2f5a99d8e2',1,'cutlass::gemm::FragmentMultiplyAdd< half >::multiply()']]], + ['multiply_5fadd',['multiply_add',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a34bbf209967fef6181d3d46dd27fa0c0',1,'cutlass::gemm::FragmentMultiplyAdd::multiply_add()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a89c8b663af69f13c2a02cb464b5172a5',1,'cutlass::gemm::FragmentMultiplyAdd< half >::multiply_add()'],['../structcutlass_1_1gemm_1_1Gemm.html#a2e844037d2527b842de3590cb783a49f',1,'cutlass::gemm::Gemm::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a66486d38349fa20eb065ae9542eb43aa',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#ad22dd143c304c22c2630aedbfd3459af',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a5dcf66c8126ec8adf8e66d4bf5b2f347',1,'cutlass::gemm::ThreadMultiplyAdd::multiply_add()']]], + ['multiplyadd',['MultiplyAdd',['../structcutlass_1_1gemm_1_1GemmConfig.html#a8669096ddbb8c810fb8d2313d62e6ee7',1,'cutlass::gemm::GemmConfig::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#af810544e956b04830c5be7ce41d3b45c',1,'cutlass::gemm::GemmTraits::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ae9facf63912d98e597883bf7efb56cc8',1,'cutlass::gemm::HgemmTraitsHelper::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a87e34d56fa955670331749724bee9fd8',1,'cutlass::gemm::IgemmTraitsHelper::MultiplyAdd()']]], + ['multiplyaddscalar',['MultiplyAddScalar',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a19fb8c9b9a77aebec507635de7da6f21',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#afac6f7a62b24396ea6861e6fd10779cc',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a42dd312d4cf5bb53b472389897f9deeb',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aad14588b1515e37ede24915f589d32ab',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar()']]] +]; diff --git a/docs/generated-html/search/all_d.html b/docs/generated-html/search/all_d.html new file mode 100644 index 00000000..cc52c79f --- /dev/null +++ b/docs/generated-html/search/all_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_d.js b/docs/generated-html/search/all_d.js new file mode 100644 index 00000000..7c84844e --- /dev/null +++ b/docs/generated-html/search/all_d.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['n',['N',['../structcutlass_1_1Coord.html#a3f2f5a9d7ef2063456c4d9f7e57e71ca',1,'cutlass::Coord::N()'],['../structcutlass_1_1gemm_1_1GemmDesc.html#acee9727aa6cb612a25cd6ced4829061a',1,'cutlass::gemm::GemmDesc::n()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9cc371cd2f1a9485583afdacbb7403ea',1,'cutlass::gemm::GemmEpilogue::n()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a437d4b6f1f149849c5ae635a5993e7ac',1,'cutlass::gemm::GemmTraits::Params::n()']]], + ['no',['no',['../structcutlass_1_1platform_1_1is__base__of__helper.html#ae096aa6c67f60d8d9c5a4b084118a8af',1,'cutlass::platform::is_base_of_helper']]], + ['noexcept',['noexcept',['../platform_8h.html#a189faadd7f99f6c354db09acbb2aafcd',1,'platform.h']]], + ['nullptr',['nullptr',['../platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936',1,'platform.h']]], + ['nullptr_5ft',['nullptr_t',['../structcutlass_1_1platform_1_1nullptr__t.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/all_e.html b/docs/generated-html/search/all_e.html new file mode 100644 index 00000000..85b39bd4 --- /dev/null +++ b/docs/generated-html/search/all_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_e.js b/docs/generated-html/search/all_e.js new file mode 100644 index 00000000..49c17a92 --- /dev/null +++ b/docs/generated-html/search/all_e.js @@ -0,0 +1,34 @@ +var searchData= +[ + ['offset',['offset',['../classcutlass_1_1TensorRef.html#a02ee5d16ed4ce4705a99bb16b2ae1ae8',1,'cutlass::TensorRef::offset()'],['../classcutlass_1_1TensorView.html#a064f3630e69798e7915f910c4ee99ab7',1,'cutlass::TensorView::offset()']]], + ['offset_5ft',['Offset_t',['../classcutlass_1_1TensorView.html#a215946fb080a5253815feb1f639c8f6f',1,'cutlass::TensorView']]], + ['operator_20_26_3d',['operator &=',['../structcutlass_1_1PredicateVector.html#a3dd9aeba8f3cbe7a8198d68d91a0bbb9',1,'cutlass::PredicateVector']]], + ['operator_20b_2a',['operator B*',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html#a8d100273203db9018dffbbe84e0b6c76',1,'cutlass::platform::is_base_of_helper::dummy']]], + ['operator_20bool',['operator bool',['../classcutlass_1_1platform_1_1unique__ptr.html#a5791650488ae864f10ad04bec4a31005',1,'cutlass::platform::unique_ptr']]], + ['operator_20d_2a',['operator D*',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html#a8aadc500baf1492b1a4d05cc8b35fc13',1,'cutlass::platform::is_base_of_helper::dummy']]], + ['operator_20value_5ftype',['operator value_type',['../structcutlass_1_1platform_1_1integral__constant.html#a55d25116387f1c6d978462b1d245d675',1,'cutlass::platform::integral_constant']]], + ['operator_21_3d',['operator!=',['../structcutlass_1_1Coord.html#a7fb46873e8f3cf38212703d35bd36995',1,'cutlass::Coord::operator!=()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a3d06715a77740034697686a7977cb685',1,'cutlass::PredicateVector::ConstIterator::operator!=()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a08cb4d1395b88a4451fbb1a27e010887',1,'cutlass::PredicateVector::Iterator::operator!=()'],['../namespacecutlass_1_1platform.html#a248f49adf09654d2cd04bd2760ab2566',1,'cutlass::platform::operator!=()']]], + ['operator_28_29',['operator()',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html#ab8adb983c0573a0015469f40a75287be',1,'cutlass::gemm::GemmGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html#abaf0d4459a64b3e9533758b59600bd52',1,'cutlass::gemm::GemmGlobalTileCdTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html#a1e357fe5bc1daef333e6be776a21a2ca',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html#a4e35f0b2ca63a6b981230b73f843f726',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html#a51a325b435b9a53effaa003b3670e410',1,'cutlass::gemm::GemmSharedLoadTileATraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html#a5b4a635a521364357386259b0f84c0ba',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html#a4f9cca16303ac9ae29a0eaa11dcc23b6',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html#ace1b936cab289c6884e673312283d422',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html#a9fc1ca09733113f80fe5fe45db3d9b81',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html#a1228edf6cc0f81af520dc77c8792b94c',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html#ad7537f8b30ee6913cf4afa1d3c054e68',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::ThreadOffset::operator()()'],['../structcutlass_1_1TiledThreadOffset.html#a7290b6ca9ef0bede634f69bd05450fa2',1,'cutlass::TiledThreadOffset::operator()()'],['../structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html#a0e4edffb19218ccbf77995f6d20df000',1,'cutlass::TileTraitsWarpRake::ThreadOffset::operator()()'],['../structcutlass_1_1platform_1_1plus.html#a3bf1e5147df4287bf58ad8f11ea0d98c',1,'cutlass::platform::plus::operator()()'],['../structcutlass_1_1platform_1_1less.html#adfb49ee70a700a8483c70b4b353f6bc5',1,'cutlass::platform::less::operator()()'],['../structcutlass_1_1platform_1_1greater.html#a8d56cf343dd33acebe19d0b51abe3978',1,'cutlass::platform::greater::operator()()'],['../structcutlass_1_1platform_1_1integral__constant.html#a5271a533526a535ae8b783c736252f18',1,'cutlass::platform::integral_constant::operator()()'],['../structcutlass_1_1platform_1_1default__delete.html#a59e6e3cc95685ac34fa6f9cf301b3a15',1,'cutlass::platform::default_delete::operator()()'],['../structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html#a16c5595a5aec7d7ee34e38bef4a66c87',1,'cutlass::platform::default_delete< T[]>::operator()()']]], + ['operator_2a',['operator*',['../structcutlass_1_1Coord.html#a8e4f7df55a75d040cf50cf9984c04c8a',1,'cutlass::Coord::operator*()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#abbc2bceb6cf8d7f168b8a00eb48c0946',1,'cutlass::PredicateVector::ConstIterator::operator*()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a049b568e0f5de011ee76ce79bcedbab4',1,'cutlass::PredicateVector::Iterator::operator*()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a78016158f99dd87e822a2a2cbd4cec78',1,'cutlass::PredicateVector::TrivialIterator::operator*()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a45a3cb6d8641a6130991d56e84cbb38b',1,'cutlass::platform::unique_ptr::operator*()']]], + ['operator_2a_3d',['operator*=',['../structcutlass_1_1Coord.html#a282b6cc9ac8b2f72720c252791155aad',1,'cutlass::Coord']]], + ['operator_2b',['operator+',['../structcutlass_1_1Coord.html#a3dfc4ce4191097b6c3268696f2a45ef5',1,'cutlass::Coord::operator+()'],['../classcutlass_1_1TensorRef.html#aa7b80d225c01c9dc12aafc515cf15842',1,'cutlass::TensorRef::operator+()']]], + ['operator_2b_2b',['operator++',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a10ee4bb2f206432aa5ee1a83cb046b70',1,'cutlass::PredicateVector::ConstIterator::operator++()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a977a99af3166a58d5bc5a613a1abe7d5',1,'cutlass::PredicateVector::ConstIterator::operator++(int)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a7dddc0a6b5c958156beef29bedfd1bd3',1,'cutlass::PredicateVector::Iterator::operator++()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a6c7333ad14d545cafc707e78752bf1e3',1,'cutlass::PredicateVector::Iterator::operator++(int)'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#ad24e9b451064e99fb19955f772c30e6a',1,'cutlass::PredicateVector::TrivialIterator::operator++()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#aa35b9165920b83b9a5a888df83925051',1,'cutlass::PredicateVector::TrivialIterator::operator++(int)']]], + ['operator_2b_3d',['operator+=',['../structcutlass_1_1Coord.html#aeb209486943fa9d42911325b16e49e09',1,'cutlass::Coord']]], + ['operator_2d',['operator-',['../structcutlass_1_1Coord.html#acc510511ffb52bed7f6a52f14b99750d',1,'cutlass::Coord::operator-()'],['../classcutlass_1_1TensorRef.html#a3843ccfd1d097f25eff45dc159709938',1,'cutlass::TensorRef::operator-()']]], + ['operator_2d_2d',['operator--',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a2763012a9284e97650b14e20c5668286',1,'cutlass::PredicateVector::ConstIterator::operator--()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a2910a714d34a688b8ea560ea2933436b',1,'cutlass::PredicateVector::ConstIterator::operator--(int)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a69fb5b24eeb43331b7401768e8584e61',1,'cutlass::PredicateVector::Iterator::operator--()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#aad709a11f43b84c88e3ce3a0394f8e8a',1,'cutlass::PredicateVector::Iterator::operator--(int)']]], + ['operator_2d_3d',['operator-=',['../structcutlass_1_1Coord.html#ac1795ec2a5890d8a39840567a4bea88e',1,'cutlass::Coord']]], + ['operator_2d_3e',['operator->',['../classcutlass_1_1platform_1_1unique__ptr.html#afa52edcaef23461ce1f9c1dac349c24b',1,'cutlass::platform::unique_ptr']]], + ['operator_2f',['operator/',['../structcutlass_1_1Coord.html#a87f485be079fa68bcf576da4d56f0ece',1,'cutlass::Coord']]], + ['operator_2f_3d',['operator/=',['../structcutlass_1_1Coord.html#abe91e59962ef0d73aec9c14824f64ecc',1,'cutlass::Coord']]], + ['operator_3c',['operator<',['../namespacecutlass_1_1platform.html#a412dbdbc678ecd12b55fcad4ef4155bd',1,'cutlass::platform']]], + ['operator_3c_3c',['operator<<',['../core__io_8h.html#a4a0d84a2a19a11549b87a2328d58690d',1,'core_io.h']]], + ['operator_3c_3d',['operator<=',['../namespacecutlass_1_1platform.html#a41d573133357bd555f78d33afc1152d3',1,'cutlass::platform']]], + ['operator_3d',['operator=',['../classcutlass_1_1TensorView.html#aa9e9e19f35ce3111f64b763ca49b51ef',1,'cutlass::TensorView']]], + ['operator_3d_3d',['operator==',['../structcutlass_1_1Coord.html#acfa94aabd0c9a71ee994ca479d5f515f',1,'cutlass::Coord::operator==()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#aa2d03d88ac23051803d010f78157c357',1,'cutlass::PredicateVector::ConstIterator::operator==()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a5c5266fcef67c7b263682c4bc4a5000e',1,'cutlass::PredicateVector::Iterator::operator==()'],['../namespacecutlass_1_1platform.html#ab9b8306ae9dc21fa646c49b68fa8e197',1,'cutlass::platform::operator==()']]], + ['operator_3e',['operator>',['../namespacecutlass_1_1platform.html#a9e8e698d40b8df881991fde9ba2a1b12',1,'cutlass::platform']]], + ['operator_3e_3d',['operator>=',['../namespacecutlass_1_1platform.html#ab0f21e67c0a4b5c6952042b502c6816f',1,'cutlass::platform']]], + ['operator_5b_5d',['operator[]',['../structcutlass_1_1Coord.html#ab7fc89de3ccd7096ab275fb5dd40104c',1,'cutlass::Coord::operator[](int dim)'],['../structcutlass_1_1Coord.html#a6eeab0a1686ee25389e1bd017c5f03ae',1,'cutlass::Coord::operator[](int dim) const'],['../structcutlass_1_1Fragment.html#a99fef5f3093b2df50905ab13819b67a0',1,'cutlass::Fragment::operator[](int i)'],['../structcutlass_1_1Fragment.html#a75f51bb6ca84615076aab42ac9d42592',1,'cutlass::Fragment::operator[](int i) const'],['../structcutlass_1_1FragmentIterator.html#a83bb6a3ed588e2d890bf986665d2b7bb',1,'cutlass::FragmentIterator::operator[](int i) const'],['../structcutlass_1_1FragmentIterator.html#a3bd2a9d8467f8db02ca3a01ae0c11ad7',1,'cutlass::FragmentIterator::operator[](int i)'],['../structcutlass_1_1FragmentConstIterator.html#af16f2aa14ff424b038a393b683c4783e',1,'cutlass::FragmentConstIterator::operator[]()'],['../structcutlass_1_1PredicateVector.html#a840985438ac8306ec680eb20edd4e5c5',1,'cutlass::PredicateVector::operator[]()'],['../classcutlass_1_1TensorRef.html#a6a2aa88ed77557c089a165da0df1e974',1,'cutlass::TensorRef::operator[](Coord< Rank > const &coord) const'],['../classcutlass_1_1TensorRef.html#a34e97ab2190b4681d1c1199186d66f1c',1,'cutlass::TensorRef::operator[](int idx) const'],['../classcutlass_1_1TensorView.html#a7fe7e44e15fd1ac58fb55edf72e8fb23',1,'cutlass::TensorView::operator[]()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a5c7a204af07a7d325b0a8303e199a50d',1,'cutlass::platform::unique_ptr::operator[]()'],['../unioncutlass_1_1Vector.html#a250860c921c94a6077344f9e11bf5b02',1,'cutlass::Vector::operator[](uint32_t i) const'],['../unioncutlass_1_1Vector.html#a44cc27bf8a7b789b4ae8538155a50156',1,'cutlass::Vector::operator[](uint32_t i)'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#ab0516cef8949f5998b5251cc6b6db683',1,'cutlass::Vector< half, kLanes_ >::operator[](uint32_t i) const'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a8ade80e040264fbd669d3f15c249884e',1,'cutlass::Vector< half, kLanes_ >::operator[](uint32_t i)']]], + ['operator_7c_3d',['operator|=',['../structcutlass_1_1PredicateVector.html#aab9de134132c62de1c062ca57582cdbc',1,'cutlass::PredicateVector']]], + ['outputfragment',['OutputFragment',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a8ef69ab595489e142911e8e240fb405a',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::OutputFragment()'],['../structcutlass_1_1Copy.html#a545be6c284d625b0841a10cc9126e14a',1,'cutlass::Copy::OutputFragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a9c04f0b0eb0293325f661b72168d4fa8',1,'cutlass::gemm::HgemmSwizzle::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a3d89bfc0d94cd695cbe4a61859e5e553',1,'cutlass::gemm::IgemmFloatToInt8Converter::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a66ac385a1cd771b95f70ee36cd74e8f7',1,'cutlass::gemm::IgemmInt8ToFloatConverter::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#ac0a4e31e95f8e0c77ae087284bb02ff8',1,'cutlass::gemm::IgemmSwizzle::OutputFragment()']]], + ['outputtile',['OutputTile',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a92a135fac401d43a8d2f14982d90274b',1,'cutlass::gemm::GemmEpilogue::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#aed1bd9df5ff579ba3e36ae5ba781c075',1,'cutlass::gemm::GemmEpilogueTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ac30a062bed1a65e45961c4f301b69101',1,'cutlass::gemm::GemmEpilogueTraitsHelper::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ad52b81080731ee1f0d3c2c7eaba6f60d',1,'cutlass::gemm::GemmSharedStoreTileDTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#acb16feebdcad5bbebe9d4d3383c37899',1,'cutlass::gemm::GemmSharedLoadTileDTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a53450f4d7444d6a4c0d2353496c0a4fd',1,'cutlass::gemm::GemmConfig::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a97d7ee63e5d180410b370f095648f367',1,'cutlass::gemm::GemmTraits::OutputTile()']]] +]; diff --git a/docs/generated-html/search/all_f.html b/docs/generated-html/search/all_f.html new file mode 100644 index 00000000..89fa15a6 --- /dev/null +++ b/docs/generated-html/search/all_f.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_f.js b/docs/generated-html/search/all_f.js new file mode 100644 index 00000000..05e5e922 --- /dev/null +++ b/docs/generated-html/search/all_f.js @@ -0,0 +1,26 @@ +var searchData= +[ + ['pad',['pad',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html',1,'cutlass::platform::alignment_of']]], + ['params',['Params',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html',1,'cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params'],['../structcutlass_1_1TileIteratorBase_1_1Params.html',1,'cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html',1,'cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params'],['../structcutlass_1_1TileLoadIterator_1_1Params.html',1,'cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1TileStoreIterator_1_1Params.html',1,'cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params'],['../structcutlass_1_1gemm_1_1Gemm_1_1Params.html',1,'cutlass::gemm::Gemm< GemmTraits_ >::Params'],['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html',1,'cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html',1,'cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html',1,'cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ae5209fa80705442693833c63d535161e',1,'cutlass::gemm::GemmEpilogue::Params()'],['../structcutlass_1_1gemm_1_1Gemm.html#a3c292637ab0ec8e73856d0cf6efb6da2',1,'cutlass::gemm::Gemm::params()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a81b028a18df51d3caa1b0ba0c990e362',1,'cutlass::gemm::GemmEpilogue::params()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ab8c79cb1a8157dd00429c93cb4a41322',1,'cutlass::gemm::GemmGlobalIteratorAb::params()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ac368b1ea1c5ad2209a6ac6bec597600f',1,'cutlass::gemm::GemmGlobalIteratorCd::params()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a0ad4218ad2c10641379b236473e79e84',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::params()'],['../structcutlass_1_1TileLoadIterator.html#aaafe35622751532971c1b7efc54c888b',1,'cutlass::TileLoadIterator::params()'],['../structcutlass_1_1TileStoreIterator.html#a5e6c00b99e0f752137b07f7059f6ee0f',1,'cutlass::TileStoreIterator::params()']]], + ['platform_2eh',['platform.h',['../platform_8h.html',1,'']]], + ['plus',['plus',['../structcutlass_1_1platform_1_1plus.html',1,'cutlass::platform']]], + ['pointer',['pointer',['../classcutlass_1_1platform_1_1unique__ptr.html#ab6ce60d03d11b269c1e151dfa7c696f9',1,'cutlass::platform::unique_ptr::pointer()'],['../structcutlass_1_1FragmentIterator.html#af667793926cdb24d701eb75e0345bbd6',1,'cutlass::FragmentIterator::pointer()'],['../structcutlass_1_1FragmentConstIterator.html#aee37f8ea06127b94a304bb776945509b',1,'cutlass::FragmentConstIterator::pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#ad764f98e770d4685006e6888214dcd4d',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::pointer()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#aa42c4e7419308926b925909e6a5c719d',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::pointer()'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#a6608f7027994aaebdefd004fe94153d9',1,'cutlass::TileLoadIterator::Params::pointer()'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#a6bbadae6b13aef8f31a77cacd88b068b',1,'cutlass::TileStoreIterator::Params::pointer()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#adcbf24c1b7f45ab5fe8f3ad94154b4d1',1,'cutlass::gemm::GlobalLoadStreamBase::Pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a3ff6f630b6b317ace1cf6e13fdf3a0cd',1,'cutlass::gemm::GemmGlobalTileTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a3abcfa68ae9904a13195d32d6e6c4bc6',1,'cutlass::gemm::GemmGlobalIteratorCd::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a5be0c995c57faafaad7ae55ae015fc00',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#ab883c2a8b90262152faca9cabe515dc4',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#adc4946dfbe914140c6852d0c05b30864',1,'cutlass::gemm::GemmSharedLoadTileATraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#afafb3d9ae470c8ef56ec4ca5e66e2182',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a20471c2f569c28538dad8a220ab25624',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a1e72b69cf2147e4d194893a64417b920',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Pointer()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a84a73da2a07210fcfad10853b941c85e',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Pointer()'],['../structcutlass_1_1TileLoadIterator.html#a5a179e148ccd770e1703f288624fa9b8',1,'cutlass::TileLoadIterator::Pointer()']]], + ['predicate_5finc_5fadvance',['predicate_inc_advance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a2b5d2b02d241e89677c41eb658ace129',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#aa0367d016549cce6bd896bae364fc248',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_inc_advance()']]], + ['predicate_5finc_5fh',['predicate_inc_h',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a36afe18f94aacd0746c8946866371d3c',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a5b8177a936ba30a3d68ca238aaf76ff6',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_inc_h()']]], + ['predicate_20iterator_20concept',['Predicate Iterator Concept',['../group__predicate__iterator__concept.html',1,'']]], + ['predicate_5foffset',['predicate_offset',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a3e8f6cf08d23318f3e3263b55cf3b84a',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_offset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a38f13119cf3111e84914f1bef6f5d985',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_offset()']]], + ['predicate_20tile_20adapter_20concept',['Predicate Tile Adapter Concept',['../group__predicate__tile__adapter.html',1,'']]], + ['predicate_5fvector_2eh',['predicate_vector.h',['../predicate__vector_8h.html',1,'']]], + ['predicate_20vector_20concept',['Predicate Vector Concept',['../group__predicate__vector__concept.html',1,'']]], + ['predicates',['predicates',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#af323c9db74f0de3376edd35eb377bc9c',1,'cutlass::gemm::GemmGlobalIteratorAb::predicates()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ad23e6224e37ec1d13dc237ce8ec6e977',1,'cutlass::gemm::GemmGlobalIteratorCd::predicates()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af3c9d62554b1d311d82ba89e09cdd3fa',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::predicates()']]], + ['predicatetileadapter',['PredicateTileAdapter',['../structcutlass_1_1PredicateTileAdapter.html',1,'cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >'],['../structcutlass_1_1PredicateTileAdapter.html#a4c9eb6c6498ccf117427a3b35f7ce5ea',1,'cutlass::PredicateTileAdapter::PredicateTileAdapter()']]], + ['predicatevector',['PredicateVector',['../structcutlass_1_1PredicateVector.html',1,'cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a3dd74f6e12339a87c0eb8f75fbdc7b9c',1,'cutlass::gemm::GemmGlobalIteratorAb::PredicateVector()'],['../structcutlass_1_1PredicateTileAdapter.html#a72669300eb0bd18ea8124f780862a0e4',1,'cutlass::PredicateTileAdapter::PredicateVector()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#ab9143288811a1262f7007f1b76b32e8f',1,'cutlass::ConstPredicateTileAdapter::PredicateVector()'],['../structcutlass_1_1TileIteratorBase.html#a7ab46a9210b421d32af4d1394892cfd5',1,'cutlass::TileIteratorBase::PredicateVector()'],['../structcutlass_1_1TileLoadIterator.html#a64ae02b44f275ef2f016949aec769328',1,'cutlass::TileLoadIterator::PredicateVector()'],['../structcutlass_1_1TileStoreIterator.html#a5aa507eaeb63951f8e69fb223ec41809',1,'cutlass::TileStoreIterator::PredicateVector()'],['../structcutlass_1_1PredicateVector.html#aec1201df19c0ed0516810a3f19353c21',1,'cutlass::PredicateVector::PredicateVector()']]], + ['predicatevector_3c_20base_3a_3aiterations_3a_3akw_20_3e',['PredicateVector< Base::Iterations::kW >',['../structcutlass_1_1PredicateVector.html',1,'cutlass']]], + ['predicatevector_3c_20shapecount_3c_20typename_20base_3a_3aiterations_20_3e_3a_3akcount_20_3e',['PredicateVector< ShapeCount< typename Base::Iterations >::kCount >',['../structcutlass_1_1PredicateVector.html',1,'cutlass']]], + ['project',['project',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html#ae91b2350374f1734a30cbed45e14b8e3',1,'cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html#a0f1579013f56fe16ebc147271f163c3c',1,'cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html#af2a323461334a6b55b95074a1973d250',1,'cutlass::gemm::ProjectOperand< GemmOperand::kC, true >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html#ace04040ccb13af5f9a283ca80ffe93d1',1,'cutlass::gemm::ProjectOperand< GemmOperand::kD, true >::project()']]], + ['projectoperand',['ProjectOperand',['../structcutlass_1_1gemm_1_1ProjectOperand.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3aka_2c_20kstrided_20_3e',['ProjectOperand< GemmOperand::kA, Kstrided >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akb_2c_20kstrided_20_3e',['ProjectOperand< GemmOperand::kB, Kstrided >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akc_2c_20true_20_3e',['ProjectOperand< GemmOperand::kC, true >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akd_2c_20true_20_3e',['ProjectOperand< GemmOperand::kD, true >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/classes_0.html b/docs/generated-html/search/classes_0.html new file mode 100644 index 00000000..e935fdf7 --- /dev/null +++ b/docs/generated-html/search/classes_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_0.js b/docs/generated-html/search/classes_0.js new file mode 100644 index 00000000..bf7a2109 --- /dev/null +++ b/docs/generated-html/search/classes_0.js @@ -0,0 +1,22 @@ +var searchData= +[ + ['aligned_5fchunk',['aligned_chunk',['../structcutlass_1_1platform_1_1aligned__chunk.html',1,'cutlass::platform']]], + ['aligned_5fstorage',['aligned_storage',['../structcutlass_1_1platform_1_1aligned__storage.html',1,'cutlass::platform']]], + ['alignedstruct',['AlignedStruct',['../structcutlass_1_1AlignedStruct.html',1,'cutlass']]], + ['alignedstruct_3c_20kvectorsize_20_3e',['AlignedStruct< kVectorSize >',['../structcutlass_1_1AlignedStruct.html',1,'cutlass']]], + ['alignment_5fof',['alignment_of',['../structcutlass_1_1platform_1_1alignment__of.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20const_20value_5ft_20_3e',['alignment_of< const value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20const_20volatile_20value_5ft_20_3e',['alignment_of< const volatile value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20double2_20_3e',['alignment_of< double2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20double4_20_3e',['alignment_of< double4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20float4_20_3e',['alignment_of< float4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20int4_20_3e',['alignment_of< int4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20long4_20_3e',['alignment_of< long4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20longlong2_20_3e',['alignment_of< longlong2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20longlong4_20_3e',['alignment_of< longlong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20uint4_20_3e',['alignment_of< uint4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulong4_20_3e',['alignment_of< ulong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulonglong2_20_3e',['alignment_of< ulonglong2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulonglong4_20_3e',['alignment_of< ulonglong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20volatile_20value_5ft_20_3e',['alignment_of< volatile value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/classes_1.html b/docs/generated-html/search/classes_1.html new file mode 100644 index 00000000..3df6e80a --- /dev/null +++ b/docs/generated-html/search/classes_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_1.js b/docs/generated-html/search/classes_1.js new file mode 100644 index 00000000..1f282923 --- /dev/null +++ b/docs/generated-html/search/classes_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['bool_5fconstant',['bool_constant',['../structcutlass_1_1platform_1_1bool__constant.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/classes_10.html b/docs/generated-html/search/classes_10.html new file mode 100644 index 00000000..0477a266 --- /dev/null +++ b/docs/generated-html/search/classes_10.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_10.js b/docs/generated-html/search/classes_10.js new file mode 100644 index 00000000..348d5341 --- /dev/null +++ b/docs/generated-html/search/classes_10.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['unique_5fptr',['unique_ptr',['../classcutlass_1_1platform_1_1unique__ptr.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/classes_11.html b/docs/generated-html/search/classes_11.html new file mode 100644 index 00000000..6bbc0d14 --- /dev/null +++ b/docs/generated-html/search/classes_11.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_11.js b/docs/generated-html/search/classes_11.js new file mode 100644 index 00000000..f0fc1493 --- /dev/null +++ b/docs/generated-html/search/classes_11.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['vector',['Vector',['../unioncutlass_1_1Vector.html',1,'cutlass']]], + ['vector_3c_20half_2c_20klanes_5f_20_3e',['Vector< half, kLanes_ >',['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html',1,'cutlass']]], + ['vectorize',['Vectorize',['../structcutlass_1_1Vectorize.html',1,'cutlass']]], + ['vectorize_3c_20element_5f_2c_201_20_3e',['Vectorize< Element_, 1 >',['../structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html',1,'cutlass']]], + ['vectortraits',['VectorTraits',['../structcutlass_1_1VectorTraits.html',1,'cutlass']]], + ['vectortraits_3c_20vector_3c_20t_2c_20lanes_20_3e_20_3e',['VectorTraits< Vector< T, Lanes > >',['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html',1,'cutlass']]], + ['vectortraits_3c_20vector_3c_20t_2c_20lanes_20_3e_20const_20_3e',['VectorTraits< Vector< T, Lanes > const >',['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_12.html b/docs/generated-html/search/classes_12.html new file mode 100644 index 00000000..c889f6d6 --- /dev/null +++ b/docs/generated-html/search/classes_12.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_12.js b/docs/generated-html/search/classes_12.js new file mode 100644 index 00000000..dadc9781 --- /dev/null +++ b/docs/generated-html/search/classes_12.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['wmmagemmglobaliteratorcd',['WmmaGemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html',1,'cutlass::gemm']]], + ['wmmagemmglobaliteratorcdtraits',['WmmaGemmGlobalIteratorCdTraits',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/classes_2.html b/docs/generated-html/search/classes_2.html new file mode 100644 index 00000000..028694ff --- /dev/null +++ b/docs/generated-html/search/classes_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_2.js b/docs/generated-html/search/classes_2.js new file mode 100644 index 00000000..ee2cf165 --- /dev/null +++ b/docs/generated-html/search/classes_2.js @@ -0,0 +1,23 @@ +var searchData= +[ + ['clearaccumulators',['ClearAccumulators',['../structcutlass_1_1gemm_1_1ClearAccumulators.html',1,'cutlass::gemm']]], + ['computeoffsetfromshape',['ComputeOffsetFromShape',['../structcutlass_1_1ComputeOffsetFromShape.html',1,'cutlass']]], + ['computeoffsetfromshape_3c_20shape_3c_201_2c_20ksh_5f_2c_20ksw_5f_2c_201_20_3e_20_3e',['ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >',['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromshape_3c_20shape_3c_201_2c_20ksh_5f_2c_20ksw_5f_2c_20ksc_5f_20_3e_20_3e',['ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >',['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromstrides',['ComputeOffsetFromStrides',['../structcutlass_1_1ComputeOffsetFromStrides.html',1,'cutlass']]], + ['computeoffsetfromstrides_3c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_201_20_3e_20_3e',['ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >',['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromstrides_3c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_20s_5fc_5f_20_3e_20_3e',['ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >',['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html',1,'cutlass']]], + ['computethreadoffsetfromstrides',['ComputeThreadOffsetFromStrides',['../structcutlass_1_1ComputeThreadOffsetFromStrides.html',1,'cutlass']]], + ['computethreadoffsetfromstrides_3c_20shape_3c_201_2c_20t_5fh_5f_2c_20t_5fw_5f_2c_201_20_3e_2c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_201_20_3e_20_3e',['ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >',['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html',1,'cutlass']]], + ['computethreadoffsetfromstrides_3c_20shape_3c_201_2c_20t_5fh_5f_2c_20t_5fw_5f_2c_20t_5fc_5f_20_3e_2c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_20s_5fc_5f_20_3e_20_3e',['ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >',['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html',1,'cutlass']]], + ['conditional',['conditional',['../structcutlass_1_1platform_1_1conditional.html',1,'cutlass::platform']]], + ['conditional_3c_20false_2c_20t_2c_20f_20_3e',['conditional< false, T, F >',['../structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html',1,'cutlass::platform']]], + ['constiterator',['ConstIterator',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html',1,'cutlass::PredicateVector']]], + ['constpredicatetileadapter',['ConstPredicateTileAdapter',['../structcutlass_1_1ConstPredicateTileAdapter.html',1,'cutlass']]], + ['convert',['Convert',['../structcutlass_1_1Convert.html',1,'cutlass']]], + ['convert_3c_20fragment_3c_20inputscalar_5f_2c_20kscalars_5f_20_3e_2c_20fragment_3c_20outputscalar_5f_2c_20kscalars_5f_20_3e_20_3e',['Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html',1,'cutlass']]], + ['coord',['Coord',['../structcutlass_1_1Coord.html',1,'cutlass']]], + ['coord_3c_204_20_3e',['Coord< 4 >',['../structcutlass_1_1Coord.html',1,'cutlass']]], + ['coord_3c_20rank_20_3e',['Coord< Rank >',['../structcutlass_1_1Coord.html',1,'cutlass']]], + ['copy',['Copy',['../structcutlass_1_1Copy.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_3.html b/docs/generated-html/search/classes_3.html new file mode 100644 index 00000000..2b1abe38 --- /dev/null +++ b/docs/generated-html/search/classes_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_3.js b/docs/generated-html/search/classes_3.js new file mode 100644 index 00000000..ec9fedbd --- /dev/null +++ b/docs/generated-html/search/classes_3.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['default_5fdelete',['default_delete',['../structcutlass_1_1platform_1_1default__delete.html',1,'cutlass::platform']]], + ['default_5fdelete_3c_20t_5b_5d_3e',['default_delete< T[]>',['../structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html',1,'cutlass::platform']]], + ['dgemmconfig',['DgemmConfig',['../structcutlass_1_1gemm_1_1DgemmConfig.html',1,'cutlass::gemm']]], + ['dgemmtraits',['DgemmTraits',['../structcutlass_1_1gemm_1_1DgemmTraits.html',1,'cutlass::gemm']]], + ['divide_5fassert',['divide_assert',['../structcutlass_1_1divide__assert.html',1,'cutlass']]], + ['dummy',['dummy',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html',1,'cutlass::platform::is_base_of_helper']]] +]; diff --git a/docs/generated-html/search/classes_4.html b/docs/generated-html/search/classes_4.html new file mode 100644 index 00000000..87352149 --- /dev/null +++ b/docs/generated-html/search/classes_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_4.js b/docs/generated-html/search/classes_4.js new file mode 100644 index 00000000..5fb3b192 --- /dev/null +++ b/docs/generated-html/search/classes_4.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['enable_5fif',['enable_if',['../structcutlass_1_1platform_1_1enable__if.html',1,'cutlass::platform']]], + ['enable_5fif_3c_20false_2c_20t_20_3e',['enable_if< false, T >',['../structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html',1,'cutlass::platform']]], + ['extent',['Extent',['../structcutlass_1_1Extent.html',1,'cutlass']]], + ['extent_3c_20vector_3c_20t_2c_20lanes_20_3e_20_3e',['Extent< Vector< T, Lanes > >',['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html',1,'cutlass']]], + ['extent_3c_20vector_3c_20t_2c_20lanes_20_3e_20const_20_3e',['Extent< Vector< T, Lanes > const >',['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_5.html b/docs/generated-html/search/classes_5.html new file mode 100644 index 00000000..ba8b1c69 --- /dev/null +++ b/docs/generated-html/search/classes_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_5.js b/docs/generated-html/search/classes_5.js new file mode 100644 index 00000000..6202ed09 --- /dev/null +++ b/docs/generated-html/search/classes_5.js @@ -0,0 +1,14 @@ +var searchData= +[ + ['fragment',['Fragment',['../structcutlass_1_1Fragment.html',1,'cutlass']]], + ['fragmentconstiterator',['FragmentConstIterator',['../structcutlass_1_1FragmentConstIterator.html',1,'cutlass']]], + ['fragmentiterator',['FragmentIterator',['../structcutlass_1_1FragmentIterator.html',1,'cutlass']]], + ['fragmentload',['FragmentLoad',['../structcutlass_1_1FragmentLoad.html',1,'cutlass']]], + ['fragmentload_3c_20iteratorfragment_3a_3akscalar_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html',1,'cutlass']]], + ['fragmentload_3c_20iteratorfragment_3a_3akwmmamatrix_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html',1,'cutlass']]], + ['fragmentmultiplyadd',['FragmentMultiplyAdd',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html',1,'cutlass::gemm']]], + ['fragmentmultiplyadd_3c_20half_20_3e',['FragmentMultiplyAdd< half >',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html',1,'cutlass::gemm']]], + ['fragmentstore',['FragmentStore',['../structcutlass_1_1FragmentStore.html',1,'cutlass']]], + ['fragmentstore_3c_20iteratorfragment_3a_3akscalar_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html',1,'cutlass']]], + ['fragmentstore_3c_20iteratorfragment_3a_3akwmmamatrix_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_6.html b/docs/generated-html/search/classes_6.html new file mode 100644 index 00000000..f5850938 --- /dev/null +++ b/docs/generated-html/search/classes_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_6.js b/docs/generated-html/search/classes_6.js new file mode 100644 index 00000000..bc65aaca --- /dev/null +++ b/docs/generated-html/search/classes_6.js @@ -0,0 +1,49 @@ +var searchData= +[ + ['gemm',['Gemm',['../structcutlass_1_1gemm_1_1Gemm.html',1,'cutlass::gemm']]], + ['gemmconfig',['GemmConfig',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20double_2c_20double_2c_20double_2c_20double_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20double_2c_20double_2c_20double_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_202_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_202_2c_201_2c_202_2c_201_2c_202_20_3e',['GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20float_2c_20float_2c_20float_2c_20float_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20float_2c_20float_2c_20float_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_204_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_204_2c_201_2c_204_2c_201_2c_202_20_3e',['GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20half_2c_20half_2c_20half_2c_20half_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20half_2c_20half_2c_20half_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_208_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_208_2c_202_2c_208_2c_202_2c_202_20_3e',['GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20int8_5ft_2c_20int8_5ft_2c_20int8_5ft_2c_20int8_5ft_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_204_2c_202_20_3e',['GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20int8_5ft_2c_20int8_5ft_2c_20scalard_5f_2c_20scalard_5f_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_2016_2c_201_2c_204_2c_201_2c_202_20_3e',['GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmdesc',['GemmDesc',['../structcutlass_1_1gemm_1_1GemmDesc.html',1,'cutlass::gemm']]], + ['gemmepilogue',['GemmEpilogue',['../structcutlass_1_1gemm_1_1GemmEpilogue.html',1,'cutlass::gemm']]], + ['gemmepiloguetraits',['GemmEpilogueTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['gemmepiloguetraits_3c_20gemmconfig_5f_3a_3aoutputtile_2c_20gemmconfig_5f_3a_3aaccumulators_2c_20helper_5f_3a_3agloballoaditeratorc_2c_20helper_5f_3a_3aglobaltransformerc_2c_20helper_5f_3a_3aglobaltransformerd_2c_20helper_5f_3a_3aglobalstoreiteratord_2c_20helper_5f_3a_3asharedstoreiteratord_2c_20helper_5f_3a_3asharedstoretransformerd_2c_20helper_5f_3a_3asharedloaditeratord_2c_20helper_5f_3a_3aiterations_2c_20helper_5f_3a_3adelta_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['gemmepiloguetraits_3c_20igemmconfig_5f_3a_3aoutputtile_2c_20igemmconfig_5f_3a_3aaccumulators_2c_20helper_5f_3a_3agloballoaditeratorc_2c_20helper_5f_3a_3aglobaltransformerc_2c_20helper_5f_3a_3aglobaltransformerd_2c_20helper_5f_3a_3aglobalstoreiteratord_2c_20helper_5f_3a_3asharedstoreiteratord_2c_20helper_5f_3a_3asharedstoretransformerd_2c_20helper_5f_3a_3asharedloaditeratord_2c_20helper_5f_3a_3aiterations_2c_20helper_5f_3a_3adelta_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['gemmepiloguetraitshelper',['GemmEpilogueTraitsHelper',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['gemmepiloguetraitshelper_3c_20igemmconfig_5f_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['gemmglobaliteratorab',['GemmGlobalIteratorAb',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html',1,'cutlass::gemm']]], + ['gemmglobaliteratorcd',['GemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html',1,'cutlass::gemm']]], + ['gemmglobaltilecdtraits',['GemmGlobalTileCdTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html',1,'cutlass::gemm']]], + ['gemmglobaltiletraits',['GemmGlobalTileTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html',1,'cutlass::gemm']]], + ['gemmglobaltiletraits_3c_20gemmoperand_3a_3akc_2c_20matrixlayout_3a_3akcolumnmajor_2c_20scalar_5f_2c_20tile_5f_2c_20threads_5f_2c_20kaccesssize_5f_20_3e',['GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html',1,'cutlass::gemm']]], + ['gemmmultiplicandtraits',['GemmMultiplicandTraits',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html',1,'cutlass::gemm']]], + ['gemmoperand',['GemmOperand',['../structcutlass_1_1GemmOperand.html',1,'cutlass']]], + ['gemmoperandtraitsab',['GemmOperandTraitsAb',['../structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html',1,'cutlass::gemm']]], + ['gemmsharedloadtileatraits',['GemmSharedLoadTileATraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html',1,'cutlass::gemm']]], + ['gemmsharedloadtilebtraits',['GemmSharedLoadTileBTraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html',1,'cutlass::gemm']]], + ['gemmsharedloadtiledtraits',['GemmSharedLoadTileDTraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstoretileabtraits',['GemmSharedStoreTileAbTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstoretiledtraits',['GemmSharedStoreTileDTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstorewithskewtileabtraits',['GemmSharedStoreWithSkewTileAbTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera',['GemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera_3c_20klayout_5f_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< kLayout_, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb',['GemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb_3c_20klayout_5f_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< kLayout_, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtraits',['GemmTraits',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20gemmconfig_5f_2c_20helper_5f_3a_3agloballoadstreama_2c_20helper_5f_3a_3agloballoadstreamb_2c_20helper_5f_3a_3asharedloadstreama_2c_20helper_5f_3a_3asharedloadstreamb_2c_20epilogue_5f_2c_20identityblockswizzle_2c_20index_5f_2c_20clearaccumulators_3c_20gemmconfig_5f_3a_3aaccumulators_3a_3aelement_20_3e_20_3e',['GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20gemmconfig_5f_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3agloballoadstreama_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3agloballoadstreamb_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3asharedloadstreama_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3asharedloadstreamb_2c_20gemmepilogue_3c_20gemmepiloguetraits_5f_20_3e_2c_20identityblockswizzle_2c_20index_5f_2c_20clearaccumulators_3c_20gemmconfig_5f_3a_3aaccumulators_3a_3aelement_20_3e_20_3e',['GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20helper_5f_3a_3agemmconfig_2c_20helper_5f_3a_3agloballoadstreama_2c_20helper_5f_3a_3agloballoadstreamb_2c_20helper_5f_3a_3asharedloadstreama_2c_20helper_5f_3a_3asharedloadstreamb_2c_20helper_5f_3a_3aepilogue_2c_20identityblockswizzle_2c_20index_5f_2c_20helper_5f_3a_3aclearaccumulators_20_3e',['GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['getextent',['GetExtent',['../structcutlass_1_1gemm_1_1GetExtent.html',1,'cutlass::gemm']]], + ['getextent_3c_20gemmoperand_3a_3aka_2c_20tile_5f_20_3e',['GetExtent< GemmOperand::kA, Tile_ >',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html',1,'cutlass::gemm']]], + ['getextent_3c_20gemmoperand_3a_3akb_2c_20tile_5f_20_3e',['GetExtent< GemmOperand::kB, Tile_ >',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html',1,'cutlass::gemm']]], + ['globalloadstream',['GlobalLoadStream',['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream'],['../structcutlass_1_1gemm_1_1GlobalLoadStream.html',1,'cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >']]], + ['globalloadstreambase',['GlobalLoadStreamBase',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html',1,'cutlass::gemm']]], + ['greater',['greater',['../structcutlass_1_1platform_1_1greater.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/classes_7.html b/docs/generated-html/search/classes_7.html new file mode 100644 index 00000000..6418529c --- /dev/null +++ b/docs/generated-html/search/classes_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_7.js b/docs/generated-html/search/classes_7.js new file mode 100644 index 00000000..ab1c551a --- /dev/null +++ b/docs/generated-html/search/classes_7.js @@ -0,0 +1,18 @@ +var searchData= +[ + ['hgemmconfig',['HgemmConfig',['../structcutlass_1_1gemm_1_1HgemmConfig.html',1,'cutlass::gemm']]], + ['hgemmcrosswiseglobaltiletraits',['HgemmCrosswiseGlobalTileTraits',['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html',1,'cutlass::gemm']]], + ['hgemmswizzle',['HgemmSwizzle',['../structcutlass_1_1gemm_1_1HgemmSwizzle.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelpera',['HgemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelpera_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelperb',['HgemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelperb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['hgemmtraits',['HgemmTraits',['../structcutlass_1_1gemm_1_1HgemmTraits.html',1,'cutlass::gemm']]], + ['hgemmtraitshelper',['HgemmTraitsHelper',['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html',1,'cutlass::gemm']]], + ['hgemmtransformera',['HgemmTransformerA',['../structcutlass_1_1gemm_1_1HgemmTransformerA.html',1,'cutlass::gemm']]], + ['hgemmtransformera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformera_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformerb',['HgemmTransformerB',['../structcutlass_1_1gemm_1_1HgemmTransformerB.html',1,'cutlass::gemm']]], + ['hgemmtransformerb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformerb_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/classes_8.html b/docs/generated-html/search/classes_8.html new file mode 100644 index 00000000..87af6f60 --- /dev/null +++ b/docs/generated-html/search/classes_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_8.js b/docs/generated-html/search/classes_8.js new file mode 100644 index 00000000..b0bdbcd6 --- /dev/null +++ b/docs/generated-html/search/classes_8.js @@ -0,0 +1,77 @@ +var searchData= +[ + ['identity',['Identity',['../structcutlass_1_1Identity.html',1,'cutlass']]], + ['identityblockswizzle',['IdentityBlockSwizzle',['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html',1,'cutlass::gemm']]], + ['igemmconfig',['IgemmConfig',['../structcutlass_1_1gemm_1_1IgemmConfig.html',1,'cutlass::gemm']]], + ['igemmconfig_3c_20outputtile_5f_2c_20int8_5ft_2c_20accumulatorsperthread_5f_20_3e',['IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >',['../structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html',1,'cutlass::gemm']]], + ['igemmcontiguousglobaltiletraits',['IgemmContiguousGlobalTileTraits',['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html',1,'cutlass::gemm']]], + ['igemmepilogue',['IgemmEpilogue',['../structcutlass_1_1gemm_1_1IgemmEpilogue.html',1,'cutlass::gemm']]], + ['igemmepilogue_3c_20gemmepiloguetraits_5f_2c_20true_20_3e',['IgemmEpilogue< GemmEpilogueTraits_, true >',['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html',1,'cutlass::gemm']]], + ['igemmepiloguescalar',['IgemmEpilogueScalar',['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html',1,'cutlass::gemm']]], + ['igemmepiloguescalar_3c_20int_20_3e',['IgemmEpilogueScalar< int >',['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html',1,'cutlass::gemm']]], + ['igemmepiloguetraits',['IgemmEpilogueTraits',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['igemmepiloguetraitshelper',['IgemmEpilogueTraitsHelper',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['igemmfloattoint8converter',['IgemmFloatToInt8Converter',['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html',1,'cutlass::gemm']]], + ['igemmgloballoadtransformer',['IgemmGlobalLoadTransformer',['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html',1,'cutlass::gemm']]], + ['igemmgloballoadtransformer_3c_20fragment_3c_20int8_5ft_2c_20kelements_5f_20_3e_2c_20float_20_3e',['IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >',['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html',1,'cutlass::gemm']]], + ['igemmglobalstoretransformer',['IgemmGlobalStoreTransformer',['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html',1,'cutlass::gemm']]], + ['igemmglobalstoretransformer_3c_20float_2c_20fragment_3c_20int8_5ft_2c_20kelements_5f_20_3e_20_3e',['IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >',['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html',1,'cutlass::gemm']]], + ['igemmint8tofloatconverter',['IgemmInt8ToFloatConverter',['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html',1,'cutlass::gemm']]], + ['igemmsharedstoretransformer',['IgemmSharedStoreTransformer',['../structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html',1,'cutlass::gemm']]], + ['igemmswizzle',['IgemmSwizzle',['../structcutlass_1_1gemm_1_1IgemmSwizzle.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelpera',['IgemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelpera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelperb',['IgemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelperb_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['igemmtraits',['IgemmTraits',['../structcutlass_1_1gemm_1_1IgemmTraits.html',1,'cutlass::gemm']]], + ['igemmtraitshelper',['IgemmTraitsHelper',['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html',1,'cutlass::gemm']]], + ['igemmtransformera',['IgemmTransformerA',['../structcutlass_1_1gemm_1_1IgemmTransformerA.html',1,'cutlass::gemm']]], + ['igemmtransformera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformera_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformerb',['IgemmTransformerB',['../structcutlass_1_1gemm_1_1IgemmTransformerB.html',1,'cutlass::gemm']]], + ['igemmtransformerb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformerb_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['integral_5fconstant',['integral_constant',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_20v_20_3e',['integral_constant< bool, V >',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5farithmetic_3c_20t_20_3e_3a_3avalue_7c_7cis_5fvoid_3c_20t_20_3e_3a_3avalue_7c_7cis_5fsame_3c_20nullptr_5ft_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fbase_5fof_5fhelper_3c_20remove_5fcv_3c_20baset_20_3e_3a_3atype_2c_20remove_5fcv_3c_20derivedt_20_3e_3a_3atype_20_3e_3a_3avalue_29_7c_7c_28is_5fsame_3c_20remove_5fcv_3c_20baset_20_3e_3a_3atype_2c_20remove_5fcv_3c_20derivedt_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_base_of_helper< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)||(is_same< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5ffundamental_3c_20t_20_3e_3a_3avalue_7c_7cis_5fpointer_3c_20t_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fintegral_3c_20t_20_3e_3a_3avalue_7c_7cis_5ffloating_5fpoint_3c_20t_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fsame_3c_20float_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_7c_7cis_5fsame_3c_20double_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28n_20_26_28n_20_2d_201_29_29_3d_3d0_20_3e',['integral_constant< bool,(N &(N - 1))==0 >',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['is_5farithmetic',['is_arithmetic',['../structcutlass_1_1platform_1_1is__arithmetic.html',1,'cutlass::platform']]], + ['is_5fbase_5fof',['is_base_of',['../structcutlass_1_1platform_1_1is__base__of.html',1,'cutlass::platform']]], + ['is_5fbase_5fof_5fhelper',['is_base_of_helper',['../structcutlass_1_1platform_1_1is__base__of__helper.html',1,'cutlass::platform']]], + ['is_5ffloating_5fpoint',['is_floating_point',['../structcutlass_1_1platform_1_1is__floating__point.html',1,'cutlass::platform']]], + ['is_5ffundamental',['is_fundamental',['../structcutlass_1_1platform_1_1is__fundamental.html',1,'cutlass::platform']]], + ['is_5fintegral',['is_integral',['../structcutlass_1_1platform_1_1is__integral.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20char_20_3e',['is_integral< char >',['../structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20const_20t_20_3e',['is_integral< const T >',['../structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20const_20volatile_20t_20_3e',['is_integral< const volatile T >',['../structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20int_20_3e',['is_integral< int >',['../structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20long_20_3e',['is_integral< long >',['../structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20long_20long_20_3e',['is_integral< long long >',['../structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20short_20_3e',['is_integral< short >',['../structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20signed_20char_20_3e',['is_integral< signed char >',['../structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20char_20_3e',['is_integral< unsigned char >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20int_20_3e',['is_integral< unsigned int >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20long_20_3e',['is_integral< unsigned long >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20long_20long_20_3e',['is_integral< unsigned long long >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20short_20_3e',['is_integral< unsigned short >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20volatile_20t_20_3e',['is_integral< volatile T >',['../structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fpointer',['is_pointer',['../structcutlass_1_1platform_1_1is__pointer.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper',['is_pointer_helper',['../structcutlass_1_1platform_1_1is__pointer__helper.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper_3c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e',['is_pointer_helper< remove_cv< T >::type >',['../structcutlass_1_1platform_1_1is__pointer__helper.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper_3c_20t_20_2a_20_3e',['is_pointer_helper< T * >',['../structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html',1,'cutlass::platform']]], + ['is_5fpow2',['is_pow2',['../structcutlass_1_1is__pow2.html',1,'cutlass']]], + ['is_5fsame',['is_same',['../structcutlass_1_1platform_1_1is__same.html',1,'cutlass::platform']]], + ['is_5fsame_3c_20a_2c_20a_20_3e',['is_same< A, A >',['../structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html',1,'cutlass::platform']]], + ['is_5fsame_3c_20void_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e',['is_same< void, remove_cv< T >::type >',['../structcutlass_1_1platform_1_1is__same.html',1,'cutlass::platform']]], + ['is_5ftrivially_5fcopyable',['is_trivially_copyable',['../structcutlass_1_1platform_1_1is__trivially__copyable.html',1,'cutlass::platform']]], + ['is_5fvoid',['is_void',['../structcutlass_1_1platform_1_1is__void.html',1,'cutlass::platform']]], + ['is_5fvolatile',['is_volatile',['../structcutlass_1_1platform_1_1is__volatile.html',1,'cutlass::platform']]], + ['is_5fvolatile_3c_20volatile_20t_20_3e',['is_volatile< volatile T >',['../structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['iterator',['Iterator',['../classcutlass_1_1PredicateVector_1_1Iterator.html',1,'cutlass::PredicateVector']]], + ['iteratoradvance',['IteratorAdvance',['../structcutlass_1_1IteratorAdvance.html',1,'cutlass']]], + ['iteratorfragment',['IteratorFragment',['../structcutlass_1_1IteratorFragment.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_9.html b/docs/generated-html/search/classes_9.html new file mode 100644 index 00000000..f830ae04 --- /dev/null +++ b/docs/generated-html/search/classes_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_9.js b/docs/generated-html/search/classes_9.js new file mode 100644 index 00000000..cf0d35f6 --- /dev/null +++ b/docs/generated-html/search/classes_9.js @@ -0,0 +1,14 @@ +var searchData= +[ + ['less',['less',['../structcutlass_1_1platform_1_1less.html',1,'cutlass::platform']]], + ['linearscaling',['LinearScaling',['../structcutlass_1_1gemm_1_1LinearScaling.html',1,'cutlass::gemm']]], + ['load',['Load',['../structcutlass_1_1Load.html',1,'cutlass']]], + ['load_3c_20double_2c_202_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Load< double, 2, Memory_, true, 16 >',['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 16 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_204_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 4 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_208_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 8 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html',1,'cutlass']]], + ['log2_5fdown',['log2_down',['../structcutlass_1_1log2__down.html',1,'cutlass']]], + ['log2_5fdown_3c_20n_2c_201_2c_20count_20_3e',['log2_down< N, 1, Count >',['../structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html',1,'cutlass']]], + ['log2_5fup',['log2_up',['../structcutlass_1_1log2__up.html',1,'cutlass']]], + ['log2_5fup_3c_20n_2c_201_2c_20count_20_3e',['log2_up< N, 1, Count >',['../structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_a.html b/docs/generated-html/search/classes_a.html new file mode 100644 index 00000000..0fd3b7ac --- /dev/null +++ b/docs/generated-html/search/classes_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_a.js b/docs/generated-html/search/classes_a.js new file mode 100644 index 00000000..0556c78d --- /dev/null +++ b/docs/generated-html/search/classes_a.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['mainloopsharedstorage',['MainLoopSharedStorage',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html',1,'cutlass::gemm::GemmTraits']]], + ['matrixlayout',['MatrixLayout',['../structcutlass_1_1MatrixLayout.html',1,'cutlass']]], + ['memoryspace',['MemorySpace',['../structcutlass_1_1MemorySpace.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_b.html b/docs/generated-html/search/classes_b.html new file mode 100644 index 00000000..886abdfc --- /dev/null +++ b/docs/generated-html/search/classes_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_b.js b/docs/generated-html/search/classes_b.js new file mode 100644 index 00000000..02d4e0ba --- /dev/null +++ b/docs/generated-html/search/classes_b.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['nullptr_5ft',['nullptr_t',['../structcutlass_1_1platform_1_1nullptr__t.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/classes_c.html b/docs/generated-html/search/classes_c.html new file mode 100644 index 00000000..52ec2676 --- /dev/null +++ b/docs/generated-html/search/classes_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_c.js b/docs/generated-html/search/classes_c.js new file mode 100644 index 00000000..5b7a2f99 --- /dev/null +++ b/docs/generated-html/search/classes_c.js @@ -0,0 +1,15 @@ +var searchData= +[ + ['pad',['pad',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html',1,'cutlass::platform::alignment_of']]], + ['params',['Params',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html',1,'cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params'],['../structcutlass_1_1TileIteratorBase_1_1Params.html',1,'cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html',1,'cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params'],['../structcutlass_1_1TileLoadIterator_1_1Params.html',1,'cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1TileStoreIterator_1_1Params.html',1,'cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params'],['../structcutlass_1_1gemm_1_1Gemm_1_1Params.html',1,'cutlass::gemm::Gemm< GemmTraits_ >::Params'],['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html',1,'cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html',1,'cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html',1,'cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params']]], + ['plus',['plus',['../structcutlass_1_1platform_1_1plus.html',1,'cutlass::platform']]], + ['predicatetileadapter',['PredicateTileAdapter',['../structcutlass_1_1PredicateTileAdapter.html',1,'cutlass']]], + ['predicatevector',['PredicateVector',['../structcutlass_1_1PredicateVector.html',1,'cutlass']]], + ['predicatevector_3c_20base_3a_3aiterations_3a_3akw_20_3e',['PredicateVector< Base::Iterations::kW >',['../structcutlass_1_1PredicateVector.html',1,'cutlass']]], + ['predicatevector_3c_20shapecount_3c_20typename_20base_3a_3aiterations_20_3e_3a_3akcount_20_3e',['PredicateVector< ShapeCount< typename Base::Iterations >::kCount >',['../structcutlass_1_1PredicateVector.html',1,'cutlass']]], + ['projectoperand',['ProjectOperand',['../structcutlass_1_1gemm_1_1ProjectOperand.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3aka_2c_20kstrided_20_3e',['ProjectOperand< GemmOperand::kA, Kstrided >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akb_2c_20kstrided_20_3e',['ProjectOperand< GemmOperand::kB, Kstrided >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akc_2c_20true_20_3e',['ProjectOperand< GemmOperand::kC, true >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akd_2c_20true_20_3e',['ProjectOperand< GemmOperand::kD, true >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/classes_d.html b/docs/generated-html/search/classes_d.html new file mode 100644 index 00000000..652508df --- /dev/null +++ b/docs/generated-html/search/classes_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_d.js b/docs/generated-html/search/classes_d.js new file mode 100644 index 00000000..b7e543fb --- /dev/null +++ b/docs/generated-html/search/classes_d.js @@ -0,0 +1,12 @@ +var searchData= +[ + ['remove_5fconst',['remove_const',['../structcutlass_1_1platform_1_1remove__const.html',1,'cutlass::platform']]], + ['remove_5fconst_3c_20const_20t_20_3e',['remove_const< const T >',['../structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html',1,'cutlass::platform']]], + ['remove_5fcv',['remove_cv',['../structcutlass_1_1platform_1_1remove__cv.html',1,'cutlass::platform']]], + ['remove_5fvolatile',['remove_volatile',['../structcutlass_1_1platform_1_1remove__volatile.html',1,'cutlass::platform']]], + ['remove_5fvolatile_3c_20volatile_20t_20_3e',['remove_volatile< volatile T >',['../structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['reshapethreads',['ReshapeThreads',['../structcutlass_1_1gemm_1_1ReshapeThreads.html',1,'cutlass::gemm']]], + ['reshapethreads_3c_20tile_5f_2c_20threads_5f_2c_20true_20_3e',['ReshapeThreads< Tile_, Threads_, true >',['../structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html',1,'cutlass::gemm']]], + ['reshapetile',['ReshapeTile',['../structcutlass_1_1ReshapeTile.html',1,'cutlass']]], + ['reshapetile_3c_20tile_5f_2c_20kaccesssize_5f_2c_20true_20_3e',['ReshapeTile< Tile_, kAccessSize_, true >',['../structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_e.html b/docs/generated-html/search/classes_e.html new file mode 100644 index 00000000..7d4e9a56 --- /dev/null +++ b/docs/generated-html/search/classes_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_e.js b/docs/generated-html/search/classes_e.js new file mode 100644 index 00000000..82b74130 --- /dev/null +++ b/docs/generated-html/search/classes_e.js @@ -0,0 +1,34 @@ +var searchData= +[ + ['sgemmconfig',['SgemmConfig',['../structcutlass_1_1gemm_1_1SgemmConfig.html',1,'cutlass::gemm']]], + ['sgemmtraits',['SgemmTraits',['../structcutlass_1_1gemm_1_1SgemmTraits.html',1,'cutlass::gemm']]], + ['shape',['Shape',['../structcutlass_1_1Shape.html',1,'cutlass']]], + ['shapeadd',['ShapeAdd',['../structcutlass_1_1ShapeAdd.html',1,'cutlass']]], + ['shapecount',['ShapeCount',['../structcutlass_1_1ShapeCount.html',1,'cutlass']]], + ['shapediv',['ShapeDiv',['../structcutlass_1_1ShapeDiv.html',1,'cutlass']]], + ['shapemax',['ShapeMax',['../structcutlass_1_1ShapeMax.html',1,'cutlass']]], + ['shapemin',['ShapeMin',['../structcutlass_1_1ShapeMin.html',1,'cutlass']]], + ['shapemul',['ShapeMul',['../structcutlass_1_1ShapeMul.html',1,'cutlass']]], + ['shapescale',['ShapeScale',['../structcutlass_1_1ShapeScale.html',1,'cutlass']]], + ['shapestrides',['ShapeStrides',['../structcutlass_1_1ShapeStrides.html',1,'cutlass']]], + ['shapesub',['ShapeSub',['../structcutlass_1_1ShapeSub.html',1,'cutlass']]], + ['sharedloadstream',['SharedLoadStream',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html',1,'cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >']]], + ['sharedstorage',['SharedStorage',['../structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html',1,'cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorage'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage'],['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html',1,'cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage']]], + ['simplifiedgemmepiloguetraits',['SimplifiedGemmEpilogueTraits',['../structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraits',['SimplifiedGemmTraits',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraits_3c_20klayouta_5f_2c_20klayoutb_5f_2c_20gemmconfig_5f_2c_20gemmepilogue_3c_20gemmepiloguetraits_5f_20_3e_2c_20index_5f_20_3e',['SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, GemmEpilogue< GemmEpilogueTraits_ >, Index_ >',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraitshelper',['SimplifiedGemmTraitsHelper',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html',1,'cutlass::gemm']]], + ['sqrt_5fest',['sqrt_est',['../structcutlass_1_1sqrt__est.html',1,'cutlass']]], + ['storagetype',['StorageType',['../structcutlass_1_1StorageType.html',1,'cutlass']]], + ['storagetype_3c_201_20_3e',['StorageType< 1 >',['../structcutlass_1_1StorageType_3_011_01_4.html',1,'cutlass']]], + ['storagetype_3c_202_20_3e',['StorageType< 2 >',['../structcutlass_1_1StorageType_3_012_01_4.html',1,'cutlass']]], + ['storagetype_3c_204_20_3e',['StorageType< 4 >',['../structcutlass_1_1StorageType_3_014_01_4.html',1,'cutlass']]], + ['store',['Store',['../structcutlass_1_1Store.html',1,'cutlass']]], + ['store_3c_20double_2c_202_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Store< double, 2, Memory_, true, 16 >',['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 16 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_204_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 4 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_208_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 8 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html',1,'cutlass']]], + ['streamsharedstorage',['StreamSharedStorage',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >'],['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage']]], + ['streamsharedstorage_3c_20globalloadstreama_2c_20sharedloadstreama_20_3e',['StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamA >',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits']]], + ['streamsharedstorage_3c_20globalloadstreamb_2c_20sharedloadstreamb_20_3e',['StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamB >',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits']]] +]; diff --git a/docs/generated-html/search/classes_f.html b/docs/generated-html/search/classes_f.html new file mode 100644 index 00000000..fa6ed25e --- /dev/null +++ b/docs/generated-html/search/classes_f.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_f.js b/docs/generated-html/search/classes_f.js new file mode 100644 index 00000000..72998f53 --- /dev/null +++ b/docs/generated-html/search/classes_f.js @@ -0,0 +1,24 @@ +var searchData= +[ + ['tensorref',['TensorRef',['../classcutlass_1_1TensorRef.html',1,'cutlass']]], + ['tensorref_3c_20t_2c_204_20_3e',['TensorRef< T, 4 >',['../classcutlass_1_1TensorRef.html',1,'cutlass']]], + ['tensorview',['TensorView',['../classcutlass_1_1TensorView.html',1,'cutlass']]], + ['threadmultiplyadd',['ThreadMultiplyAdd',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html',1,'cutlass::gemm']]], + ['threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20threadsperwarp_5f_2c_20half_2c_20half_2c_20half_20_3e',['ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html',1,'cutlass::gemm']]], + ['threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20threadsperwarp_5f_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e',['ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html',1,'cutlass::gemm']]], + ['threadoffset',['ThreadOffset',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset'],['../structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html',1,'cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset']]], + ['tiledthreadoffset',['TiledThreadOffset',['../structcutlass_1_1TiledThreadOffset.html',1,'cutlass']]], + ['tileiteratorbase',['TileIteratorBase',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileiteratorbase_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20advance_5f_2c_20memoryspace_2c_20index_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20iteratorfragment_3a_3akscalar_2c_20shape_3c_200_2c_200_2c_200_2c_200_20_3e_20_3e',['TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileiteratorbase_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20iteratoradvance_3a_3akh_2c_20memoryspace_3a_3akglobal_2c_20index_5f_20_3e',['TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileloaditerator',['TileLoadIterator',['../structcutlass_1_1TileLoadIterator.html',1,'cutlass']]], + ['tileloaditerator_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20tiletraits_5f_3a_3amultiplicandtraits_3a_3akkstrided_20_3f_20iteratoradvance_3a_3akh_20_3aiteratoradvance_3a_3akw_2c_20memoryspace_3a_3akglobal_2c_20index_5f_20_3e',['TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >',['../structcutlass_1_1TileLoadIterator.html',1,'cutlass']]], + ['tilestoreiterator',['TileStoreIterator',['../structcutlass_1_1TileStoreIterator.html',1,'cutlass']]], + ['tiletraits',['TileTraits',['../structcutlass_1_1TileTraits.html',1,'cutlass']]], + ['tiletraitscontiguousmajor',['TileTraitsContiguousMajor',['../structcutlass_1_1TileTraitsContiguousMajor.html',1,'cutlass']]], + ['tiletraitsstandard',['TileTraitsStandard',['../structcutlass_1_1TileTraitsStandard.html',1,'cutlass']]], + ['tiletraitsstridemajor',['TileTraitsStrideMajor',['../structcutlass_1_1TileTraitsStrideMajor.html',1,'cutlass']]], + ['tiletraitswarprake',['TileTraitsWarpRake',['../structcutlass_1_1TileTraitsWarpRake.html',1,'cutlass']]], + ['trivialiterator',['TrivialIterator',['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html',1,'cutlass::PredicateVector']]], + ['trivialpredicatetileadapter',['TrivialPredicateTileAdapter',['../structcutlass_1_1TrivialPredicateTileAdapter.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/close.png b/docs/generated-html/search/close.png new file mode 100644 index 00000000..9342d3df Binary files /dev/null and b/docs/generated-html/search/close.png differ diff --git a/docs/generated-html/search/defines_0.html b/docs/generated-html/search/defines_0.html new file mode 100644 index 00000000..3bffafa9 --- /dev/null +++ b/docs/generated-html/search/defines_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/defines_0.js b/docs/generated-html/search/defines_0.js new file mode 100644 index 00000000..84111a02 --- /dev/null +++ b/docs/generated-html/search/defines_0.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['_5f_5fnv_5fstd_5fmax',['__NV_STD_MAX',['../platform_8h.html#abd31f291635329bc15292954f1f01d38',1,'platform.h']]], + ['_5f_5fnv_5fstd_5fmin',['__NV_STD_MIN',['../platform_8h.html#a39e234a3e3b0018b58df720bcb143420',1,'platform.h']]], + ['_5f_5fplatform_5fcat',['__platform_cat',['../platform_8h.html#aece7fe71be5aaf8d12dc9e2372f97de4',1,'platform.h']]], + ['_5f_5fplatform_5fcat_5f',['__platform_cat_',['../platform_8h.html#acd148999a5caeba8f6fd52e7e288e659',1,'platform.h']]] +]; diff --git a/docs/generated-html/search/defines_1.html b/docs/generated-html/search/defines_1.html new file mode 100644 index 00000000..ca5bb94e --- /dev/null +++ b/docs/generated-html/search/defines_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/defines_1.js b/docs/generated-html/search/defines_1.js new file mode 100644 index 00000000..f59274bc --- /dev/null +++ b/docs/generated-html/search/defines_1.js @@ -0,0 +1,17 @@ +var searchData= +[ + ['constexpr',['constexpr',['../platform_8h.html#a72f0657181cca64b44eb186b707eb380',1,'platform.h']]], + ['cuda_5flog',['CUDA_LOG',['../debug_8h.html#a27e3466bcf1ec7fda4f6f95aa0a51177',1,'debug.h']]], + ['cuda_5flog_5fdebug',['CUDA_LOG_DEBUG',['../debug_8h.html#a8d6986db819719ada8b29d53dfc104a6',1,'debug.h']]], + ['cuda_5fperror',['CUDA_PERROR',['../debug_8h.html#aed8337b88d71895f95f8980ef0b3a50b',1,'debug.h']]], + ['cuda_5fperror_5fdebug',['CUDA_PERROR_DEBUG',['../debug_8h.html#a36436f5408940a47ac5cdfc9b31648db',1,'debug.h']]], + ['cuda_5fperror_5fexit',['CUDA_PERROR_EXIT',['../debug_8h.html#a002632ff687c83cff0484476be401f05',1,'debug.h']]], + ['cutlass_5fassert',['CUTLASS_ASSERT',['../cutlass_8h.html#a0159b8e4cd578881a1ccfd0921516af7',1,'cutlass.h']]], + ['cutlass_5fhost_5fdevice',['CUTLASS_HOST_DEVICE',['../cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1',1,'cutlass.h']]], + ['cutlass_5fmajor',['CUTLASS_MAJOR',['../cutlass_8h.html#a8ff3cda9323810c1c504793a0206d4b8',1,'cutlass.h']]], + ['cutlass_5fminor',['CUTLASS_MINOR',['../cutlass_8h.html#ad114a1ab01f73833ea00020ffb7bcea7',1,'cutlass.h']]], + ['cutlass_5fpatch',['CUTLASS_PATCH',['../cutlass_8h.html#a1d4e5818a594bbfc472e54978955cb8b',1,'cutlass.h']]], + ['cutlass_5fpragma_5fno_5funroll',['CUTLASS_PRAGMA_NO_UNROLL',['../cutlass_8h.html#adb3bc73d74b4a4bf13099d5696db3352',1,'cutlass.h']]], + ['cutlass_5fpragma_5funroll',['CUTLASS_PRAGMA_UNROLL',['../cutlass_8h.html#a4b1c9f25ab6eaa25e1f2258dd63e6ce4',1,'cutlass.h']]], + ['cutlass_5fversion',['CUTLASS_VERSION',['../cutlass_8h.html#aa3040eddf073214969f9445bfa925039',1,'cutlass.h']]] +]; diff --git a/docs/generated-html/search/defines_2.html b/docs/generated-html/search/defines_2.html new file mode 100644 index 00000000..7cc1a74c --- /dev/null +++ b/docs/generated-html/search/defines_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/defines_2.js b/docs/generated-html/search/defines_2.js new file mode 100644 index 00000000..6b3f8ea5 --- /dev/null +++ b/docs/generated-html/search/defines_2.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['noexcept',['noexcept',['../platform_8h.html#a189faadd7f99f6c354db09acbb2aafcd',1,'platform.h']]], + ['nullptr',['nullptr',['../platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936',1,'platform.h']]] +]; diff --git a/docs/generated-html/search/defines_3.html b/docs/generated-html/search/defines_3.html new file mode 100644 index 00000000..3d0ac123 --- /dev/null +++ b/docs/generated-html/search/defines_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/defines_3.js b/docs/generated-html/search/defines_3.js new file mode 100644 index 00000000..72e85b85 --- /dev/null +++ b/docs/generated-html/search/defines_3.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['static_5fassert',['static_assert',['../platform_8h.html#adde4c9ea91b753491851361a4198c009',1,'platform.h']]] +]; diff --git a/docs/generated-html/search/enums_0.html b/docs/generated-html/search/enums_0.html new file mode 100644 index 00000000..9efcd1b7 --- /dev/null +++ b/docs/generated-html/search/enums_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/enums_0.js b/docs/generated-html/search/enums_0.js new file mode 100644 index 00000000..73c94c1f --- /dev/null +++ b/docs/generated-html/search/enums_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['kind',['Kind',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375',1,'cutlass::Identity::Kind()'],['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03c',1,'cutlass::MemorySpace::Kind()'],['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2b',1,'cutlass::MatrixLayout::Kind()'],['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0c',1,'cutlass::GemmOperand::Kind()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738dda',1,'cutlass::IteratorAdvance::Kind()'],['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80',1,'cutlass::IteratorFragment::Kind()']]] +]; diff --git a/docs/generated-html/search/enumvalues_0.html b/docs/generated-html/search/enumvalues_0.html new file mode 100644 index 00000000..03fdfad9 --- /dev/null +++ b/docs/generated-html/search/enumvalues_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/enumvalues_0.js b/docs/generated-html/search/enumvalues_0.js new file mode 100644 index 00000000..f5435725 --- /dev/null +++ b/docs/generated-html/search/enumvalues_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['additive',['Additive',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375a77d7cc80ec0c3ff42ca9b2aff98a1646',1,'cutlass::Identity']]] +]; diff --git a/docs/generated-html/search/enumvalues_1.html b/docs/generated-html/search/enumvalues_1.html new file mode 100644 index 00000000..abeea564 --- /dev/null +++ b/docs/generated-html/search/enumvalues_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/enumvalues_1.js b/docs/generated-html/search/enumvalues_1.js new file mode 100644 index 00000000..b06592d0 --- /dev/null +++ b/docs/generated-html/search/enumvalues_1.js @@ -0,0 +1,19 @@ +var searchData= +[ + ['ka',['kA',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0cac2b9fe9e3679a059d1a6c946b2a2c31a',1,'cutlass::GemmOperand']]], + ['kb',['kB',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0caad0876342d150cef7da6ae149d5e99f9',1,'cutlass::GemmOperand']]], + ['kc',['kC',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0ca7598e104da2001a76ec344f1c1b9c6dc',1,'cutlass::GemmOperand']]], + ['kcolumnmajor',['kColumnMajor',['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2bac15988acba79c11072d38b295f163a2b',1,'cutlass::MatrixLayout']]], + ['kd',['kD',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0ca49eef82461e44c96462f9c4dbaab71fe',1,'cutlass::GemmOperand::kD()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaa56ecb02f4ed3bd7ae4a9c971805ee8c5',1,'cutlass::IteratorAdvance::kD()']]], + ['kgeneric',['kGeneric',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03ca21a44c0b78017acea0d1ffe223e5ca38',1,'cutlass::MemorySpace']]], + ['kglobal',['kGlobal',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03cac4bd4070cc396d698beb7ca2e3bbff37',1,'cutlass::MemorySpace']]], + ['kh',['kH',['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaacfe756fca665eb1bbf389850915c1b81',1,'cutlass::IteratorAdvance']]], + ['klanes',['kLanes',['../unioncutlass_1_1Vector.html#a824f9ab976c8e7f035236af03e5ae839a605c5e987bc7b08d743f29a6524abb27',1,'cutlass::Vector::kLanes()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#aa70d2fd36f00b63d321c1f7b6d6c3024ad242b575673ca1bf9cf311e58a966392',1,'cutlass::Vector< half, kLanes_ >::kLanes()']]], + ['krequiresloadfence',['kRequiresLoadFence',['../structcutlass_1_1TileLoadIterator.html#a1f3601c595f12e7083919ece9b1ec84eaee9d9d6cea8079c32c9383bde45161fc',1,'cutlass::TileLoadIterator']]], + ['krowmajor',['kRowMajor',['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2ba6a287c17f9f5bf53528ae68296beeedb',1,'cutlass::MatrixLayout']]], + ['kscalar',['kScalar',['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80aeca44a186befa21ccae44eb4dc7b6954',1,'cutlass::IteratorFragment']]], + ['kshared',['kShared',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03ca2804339b2be64ff68ae3042073aaa7cc',1,'cutlass::MemorySpace']]], + ['kvectorsize',['kVectorSize',['../unioncutlass_1_1Vector.html#abf0c16b6f9cb8439835ebdb271d58763afaf4b62c6bcafbf961c5570364a0316e',1,'cutlass::Vector::kVectorSize()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#adc4140a7e40be1e4f81c78a657c7ba73abfbb3cf98db2f8af7150efb91cac4e79',1,'cutlass::Vector< half, kLanes_ >::kVectorSize()']]], + ['kw',['kW',['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaa567e61af8a3401d302f3a3ab26418df0',1,'cutlass::IteratorAdvance']]], + ['kwmmamatrix',['kWmmaMatrix',['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80a21d2b2793bab0d348df40715b8f14419',1,'cutlass::IteratorFragment']]] +]; diff --git a/docs/generated-html/search/enumvalues_2.html b/docs/generated-html/search/enumvalues_2.html new file mode 100644 index 00000000..90289986 --- /dev/null +++ b/docs/generated-html/search/enumvalues_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/enumvalues_2.js b/docs/generated-html/search/enumvalues_2.js new file mode 100644 index 00000000..d96aad12 --- /dev/null +++ b/docs/generated-html/search/enumvalues_2.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['multiplicative',['Multiplicative',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375af0cc1d8a713958a86af1063595604597',1,'cutlass::Identity']]] +]; diff --git a/docs/generated-html/search/enumvalues_3.html b/docs/generated-html/search/enumvalues_3.html new file mode 100644 index 00000000..b152efcb --- /dev/null +++ b/docs/generated-html/search/enumvalues_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/enumvalues_3.js b/docs/generated-html/search/enumvalues_3.js new file mode 100644 index 00000000..254df8ef --- /dev/null +++ b/docs/generated-html/search/enumvalues_3.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['value',['value',['../structcutlass_1_1log2__down.html#a793565cd891559fab765455e847171dca23d1b50f2f02e1026d4b5dc7ebd6880d',1,'cutlass::log2_down::value()'],['../structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html#ad7d3c2329ab708bd4af36ffaee8509cba282c4c5d8f66dc49544f34071f148b1f',1,'cutlass::log2_down< N, 1, Count >::value()'],['../structcutlass_1_1log2__up.html#a5826002505544547d0c5cc311c2338e3a09591054a7c9b184769d579c56dd09d6',1,'cutlass::log2_up::value()'],['../structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html#ab001737f02df0a2c514334a1bfa6f1f9a6b6af5b6bf14ee5d3e3f1442e7f75117',1,'cutlass::log2_up< N, 1, Count >::value()'],['../structcutlass_1_1sqrt__est.html#abe44577e3d8f34fc07bb9ecf89b25b11a2e73d046302be2504f50c08d788e9964',1,'cutlass::sqrt_est::value()'],['../structcutlass_1_1divide__assert.html#a20e8b8a803c6b5cfe636724760442e33ab924a64662c2eb917b1dd4ca31fdd2dc',1,'cutlass::divide_assert::value()'],['../structcutlass_1_1platform_1_1alignment__of.html#aa1d40937d3536b68e90c580765821389aa36284864bc3d1f73d3bf73cd8da7c83',1,'cutlass::platform::alignment_of::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html#a6005c446eb41749276e0114b82abd990a5b0129d0f9bb45f1c56506efbbb22b6f',1,'cutlass::platform::alignment_of< int4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html#ac55e0c5a0bc4c95981744e55ee7580cea807729922944eede573430b20ad4b322',1,'cutlass::platform::alignment_of< uint4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html#ac9e709c32271b14b35c9607c64835a95a6a6ee3f24f4d123fc7c138fe5b776f2e',1,'cutlass::platform::alignment_of< float4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html#ad58512f76f0b9b000d48f1ff869a0547a3d020dd8ba5c735a60d7c2c897e158f5',1,'cutlass::platform::alignment_of< long4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html#adc0eec628649de183fe984bb46898830a8152a79c27d055dc3d0b8d662c0bc96a',1,'cutlass::platform::alignment_of< ulong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html#aadf6522691db02f1aab22c22716f0793a940fa73dc4f0a49b78e4e0cefaf4775d',1,'cutlass::platform::alignment_of< longlong2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html#a511f088278b3de04feb55ab60bdc5a09a58b5cc7be52956c43c2966af5887db80',1,'cutlass::platform::alignment_of< ulonglong2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html#a5fb114d264023728cca5364401bd6929a7b89d57c8009e094f69ff57e196d8318',1,'cutlass::platform::alignment_of< double2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html#a666c4fd30155873e3499f5cdc11782daafc1a7c2bb5e6483d42d380a2b4fd9561',1,'cutlass::platform::alignment_of< longlong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html#a2568c1ab218cab6505bd20e3c2c420ffa54f6e1afec0ed30b18ab79fd6faf81b5',1,'cutlass::platform::alignment_of< ulonglong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html#a024eaf40a8f3e8bd38b416868e0c68bca5a60b16666306472e92ad1320473ba85',1,'cutlass::platform::alignment_of< double4 >::value()']]] +]; diff --git a/docs/generated-html/search/files_0.html b/docs/generated-html/search/files_0.html new file mode 100644 index 00000000..49606c82 --- /dev/null +++ b/docs/generated-html/search/files_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_0.js b/docs/generated-html/search/files_0.js new file mode 100644 index 00000000..0ba05c30 --- /dev/null +++ b/docs/generated-html/search/files_0.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['clear_5faccumulators_2eh',['clear_accumulators.h',['../clear__accumulators_8h.html',1,'']]], + ['convert_2eh',['convert.h',['../convert_8h.html',1,'']]], + ['coord_2eh',['coord.h',['../coord_8h.html',1,'']]], + ['core_5fio_2eh',['core_io.h',['../core__io_8h.html',1,'']]], + ['cutlass_2eh',['cutlass.h',['../cutlass_8h.html',1,'']]], + ['cutlass_5fmath_2eh',['cutlass_math.h',['../cutlass__math_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_1.html b/docs/generated-html/search/files_1.html new file mode 100644 index 00000000..c8871748 --- /dev/null +++ b/docs/generated-html/search/files_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_1.js b/docs/generated-html/search/files_1.js new file mode 100644 index 00000000..e0b1e0e0 --- /dev/null +++ b/docs/generated-html/search/files_1.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['debug_2eh',['debug.h',['../debug_8h.html',1,'']]], + ['dgemm_5ftraits_2eh',['dgemm_traits.h',['../dgemm__traits_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_2.html b/docs/generated-html/search/files_2.html new file mode 100644 index 00000000..99bdf21c --- /dev/null +++ b/docs/generated-html/search/files_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_2.js b/docs/generated-html/search/files_2.js new file mode 100644 index 00000000..a34177fe --- /dev/null +++ b/docs/generated-html/search/files_2.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['fragment_2eh',['fragment.h',['../fragment_8h.html',1,'']]], + ['fragment_5fload_5fstore_2eh',['fragment_load_store.h',['../fragment__load__store_8h.html',1,'']]], + ['fragment_5fmultiply_5fadd_2eh',['fragment_multiply_add.h',['../fragment__multiply__add_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_3.html b/docs/generated-html/search/files_3.html new file mode 100644 index 00000000..f8e543a8 --- /dev/null +++ b/docs/generated-html/search/files_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_3.js b/docs/generated-html/search/files_3.js new file mode 100644 index 00000000..0c2ade3e --- /dev/null +++ b/docs/generated-html/search/files_3.js @@ -0,0 +1,12 @@ +var searchData= +[ + ['gemm_2eh',['gemm.h',['../gemm_8h.html',1,'']]], + ['gemm_5fepilogue_2eh',['gemm_epilogue.h',['../gemm__epilogue_8h.html',1,'']]], + ['gemm_5fepilogue_5ftraits_2eh',['gemm_epilogue_traits.h',['../gemm__epilogue__traits_8h.html',1,'']]], + ['gemm_5fglobal_5fstream_2eh',['gemm_global_stream.h',['../gemm__global__stream_8h.html',1,'']]], + ['gemm_5fglobal_5ftile_2eh',['gemm_global_tile.h',['../gemm__global__tile_8h.html',1,'']]], + ['gemm_5foperand_2eh',['gemm_operand.h',['../gemm__operand_8h.html',1,'']]], + ['gemm_5fshared_5fstream_2eh',['gemm_shared_stream.h',['../gemm__shared__stream_8h.html',1,'']]], + ['gemm_5fshared_5ftile_2eh',['gemm_shared_tile.h',['../gemm__shared__tile_8h.html',1,'']]], + ['gemm_5ftraits_2eh',['gemm_traits.h',['../gemm__traits_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_4.html b/docs/generated-html/search/files_4.html new file mode 100644 index 00000000..2ebb46c7 --- /dev/null +++ b/docs/generated-html/search/files_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_4.js b/docs/generated-html/search/files_4.js new file mode 100644 index 00000000..991070fd --- /dev/null +++ b/docs/generated-html/search/files_4.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['hgemm_5fglobal_5ftile_2eh',['hgemm_global_tile.h',['../hgemm__global__tile_8h.html',1,'']]], + ['hgemm_5fmultiply_5fadd_2eh',['hgemm_multiply_add.h',['../hgemm__multiply__add_8h.html',1,'']]], + ['hgemm_5fswizzle_2eh',['hgemm_swizzle.h',['../hgemm__swizzle_8h.html',1,'']]], + ['hgemm_5ftraits_2eh',['hgemm_traits.h',['../hgemm__traits_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_5.html b/docs/generated-html/search/files_5.html new file mode 100644 index 00000000..268b7eb5 --- /dev/null +++ b/docs/generated-html/search/files_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_5.js b/docs/generated-html/search/files_5.js new file mode 100644 index 00000000..6e82d0eb --- /dev/null +++ b/docs/generated-html/search/files_5.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['identity_5fblock_5fswizzle_2eh',['identity_block_swizzle.h',['../identity__block__swizzle_8h.html',1,'']]], + ['igemm_5fepilogue_2eh',['igemm_epilogue.h',['../igemm__epilogue_8h.html',1,'']]], + ['igemm_5fglobal_5ftile_2eh',['igemm_global_tile.h',['../igemm__global__tile_8h.html',1,'']]], + ['igemm_5fmultiply_5fadd_2eh',['igemm_multiply_add.h',['../igemm__multiply__add_8h.html',1,'']]], + ['igemm_5fswizzle_2eh',['igemm_swizzle.h',['../igemm__swizzle_8h.html',1,'']]], + ['igemm_5ftraits_2eh',['igemm_traits.h',['../igemm__traits_8h.html',1,'']]], + ['iterator_5faccess_2eh',['iterator_access.h',['../iterator__access_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_6.html b/docs/generated-html/search/files_6.html new file mode 100644 index 00000000..98fc6666 --- /dev/null +++ b/docs/generated-html/search/files_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_6.js b/docs/generated-html/search/files_6.js new file mode 100644 index 00000000..20c8aded --- /dev/null +++ b/docs/generated-html/search/files_6.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['linear_5fscaling_2eh',['linear_scaling.h',['../linear__scaling_8h.html',1,'']]], + ['load_5fstore_2eh',['load_store.h',['../load__store_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_7.html b/docs/generated-html/search/files_7.html new file mode 100644 index 00000000..49507ded --- /dev/null +++ b/docs/generated-html/search/files_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_7.js b/docs/generated-html/search/files_7.js new file mode 100644 index 00000000..c9a077bc --- /dev/null +++ b/docs/generated-html/search/files_7.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['matrix_5ftraits_2eh',['matrix_traits.h',['../matrix__traits_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_8.html b/docs/generated-html/search/files_8.html new file mode 100644 index 00000000..12c6630b --- /dev/null +++ b/docs/generated-html/search/files_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_8.js b/docs/generated-html/search/files_8.js new file mode 100644 index 00000000..b0cdc6b7 --- /dev/null +++ b/docs/generated-html/search/files_8.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['platform_2eh',['platform.h',['../platform_8h.html',1,'']]], + ['predicate_5fvector_2eh',['predicate_vector.h',['../predicate__vector_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_9.html b/docs/generated-html/search/files_9.html new file mode 100644 index 00000000..cabcae2f --- /dev/null +++ b/docs/generated-html/search/files_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_9.js b/docs/generated-html/search/files_9.js new file mode 100644 index 00000000..e3c39a10 --- /dev/null +++ b/docs/generated-html/search/files_9.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['reshape_5ftile_2eh',['reshape_tile.h',['../reshape__tile_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_a.html b/docs/generated-html/search/files_a.html new file mode 100644 index 00000000..f7402215 --- /dev/null +++ b/docs/generated-html/search/files_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_a.js b/docs/generated-html/search/files_a.js new file mode 100644 index 00000000..5ee47424 --- /dev/null +++ b/docs/generated-html/search/files_a.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['sgemm_5ftraits_2eh',['sgemm_traits.h',['../sgemm__traits_8h.html',1,'']]], + ['shape_2eh',['shape.h',['../shape_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_b.html b/docs/generated-html/search/files_b.html new file mode 100644 index 00000000..7be10030 --- /dev/null +++ b/docs/generated-html/search/files_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_b.js b/docs/generated-html/search/files_b.js new file mode 100644 index 00000000..f443994e --- /dev/null +++ b/docs/generated-html/search/files_b.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['tensor_5fref_2eh',['tensor_ref.h',['../tensor__ref_8h.html',1,'']]], + ['tensor_5fview_2eh',['tensor_view.h',['../tensor__view_8h.html',1,'']]], + ['thread_5fmultiply_5fadd_2eh',['thread_multiply_add.h',['../thread__multiply__add_8h.html',1,'']]], + ['tile_5fiterator_2eh',['tile_iterator.h',['../tile__iterator_8h.html',1,'']]], + ['tile_5ftraits_5fstandard_2eh',['tile_traits_standard.h',['../tile__traits__standard_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_c.html b/docs/generated-html/search/files_c.html new file mode 100644 index 00000000..c769b49b --- /dev/null +++ b/docs/generated-html/search/files_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_c.js b/docs/generated-html/search/files_c.js new file mode 100644 index 00000000..4edbbe39 --- /dev/null +++ b/docs/generated-html/search/files_c.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['vector_2eh',['vector.h',['../vector_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_d.html b/docs/generated-html/search/files_d.html new file mode 100644 index 00000000..54e39775 --- /dev/null +++ b/docs/generated-html/search/files_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_d.js b/docs/generated-html/search/files_d.js new file mode 100644 index 00000000..732797da --- /dev/null +++ b/docs/generated-html/search/files_d.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['wmma_5fgemm_5fepilogue_5ftraits_2eh',['wmma_gemm_epilogue_traits.h',['../wmma__gemm__epilogue__traits_8h.html',1,'']]], + ['wmma_5fgemm_5fglobal_5ftile_2eh',['wmma_gemm_global_tile.h',['../wmma__gemm__global__tile_8h.html',1,'']]], + ['wmma_5fgemm_5fmultiply_5fadd_2eh',['wmma_gemm_multiply_add.h',['../wmma__gemm__multiply__add_8h.html',1,'']]], + ['wmma_5fgemm_5fshared_5ftile_2eh',['wmma_gemm_shared_tile.h',['../wmma__gemm__shared__tile_8h.html',1,'']]], + ['wmma_5fgemm_5ftraits_2eh',['wmma_gemm_traits.h',['../wmma__gemm__traits_8h.html',1,'']]], + ['wmma_5fmatrix_2eh',['wmma_matrix.h',['../wmma__matrix_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_e.html b/docs/generated-html/search/files_e.html new file mode 100644 index 00000000..febb5494 --- /dev/null +++ b/docs/generated-html/search/files_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_e.js b/docs/generated-html/search/files_e.js new file mode 100644 index 00000000..732797da --- /dev/null +++ b/docs/generated-html/search/files_e.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['wmma_5fgemm_5fepilogue_5ftraits_2eh',['wmma_gemm_epilogue_traits.h',['../wmma__gemm__epilogue__traits_8h.html',1,'']]], + ['wmma_5fgemm_5fglobal_5ftile_2eh',['wmma_gemm_global_tile.h',['../wmma__gemm__global__tile_8h.html',1,'']]], + ['wmma_5fgemm_5fmultiply_5fadd_2eh',['wmma_gemm_multiply_add.h',['../wmma__gemm__multiply__add_8h.html',1,'']]], + ['wmma_5fgemm_5fshared_5ftile_2eh',['wmma_gemm_shared_tile.h',['../wmma__gemm__shared__tile_8h.html',1,'']]], + ['wmma_5fgemm_5ftraits_2eh',['wmma_gemm_traits.h',['../wmma__gemm__traits_8h.html',1,'']]], + ['wmma_5fmatrix_2eh',['wmma_matrix.h',['../wmma__matrix_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/functions_0.html b/docs/generated-html/search/functions_0.html new file mode 100644 index 00000000..0539c8ce --- /dev/null +++ b/docs/generated-html/search/functions_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_0.js b/docs/generated-html/search/functions_0.js new file mode 100644 index 00000000..9bb68b7c --- /dev/null +++ b/docs/generated-html/search/functions_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['_5f_5falign_5f_5f',['__align__',['../namespacecutlass_1_1platform.html#ac9068e2d027ffdf5cd564deecc2cb9e8',1,'cutlass::platform::__align__(1) aligned_chunk< 1 >'],['../namespacecutlass_1_1platform.html#a0bcb016704ec57f9499e662ba6156f98',1,'cutlass::platform::__align__(2) aligned_chunk< 2 >'],['../namespacecutlass_1_1platform.html#a71be5af25eeffa4077777f919e67d8da',1,'cutlass::platform::__align__(4) aligned_chunk< 4 >'],['../namespacecutlass_1_1platform.html#a42440254a16d4b6b95b95cc3360ee372',1,'cutlass::platform::__align__(8) aligned_chunk< 8 >'],['../namespacecutlass_1_1platform.html#a91d5e970d6ebe619914f40a9510bdb1e',1,'cutlass::platform::__align__(16) aligned_chunk< 16 >'],['../namespacecutlass_1_1platform.html#a210f4d360b1f9c3d074e71129fe4c0d9',1,'cutlass::platform::__align__(32) aligned_chunk< 32 >'],['../namespacecutlass_1_1platform.html#ae792b1c7ada1a33e306cd552f583bdce',1,'cutlass::platform::__align__(64) aligned_chunk< 64 >'],['../namespacecutlass_1_1platform.html#a5712ec4fed335a9b7f863fb3abe3c5eb',1,'cutlass::platform::__align__(128) aligned_chunk< 128 >'],['../namespacecutlass_1_1platform.html#a595cc98db29fb4d59772d2e2f52e347a',1,'cutlass::platform::__align__(256) aligned_chunk< 256 >'],['../namespacecutlass_1_1platform.html#ae70bb5d14a66500b47d2e3f83063d4a5',1,'cutlass::platform::__align__(512) aligned_chunk< 512 >'],['../namespacecutlass_1_1platform.html#a181e44e9c66f704175590727aaa9e5a1',1,'cutlass::platform::__align__(1024) aligned_chunk< 1024 >'],['../namespacecutlass_1_1platform.html#ae72c8fa997bb251d4140dceb03147154',1,'cutlass::platform::__align__(2048) aligned_chunk< 2048 >'],['../namespacecutlass_1_1platform.html#ada29683f1b408ae7b73cc8fbe2108628',1,'cutlass::platform::__align__(4096) aligned_chunk< 4096 >'],['../namespacecutlass.html#ae6ee3d9361526f859d737d9c68c13706',1,'cutlass::__align__(1) AlignedStruct< 1 >'],['../namespacecutlass.html#a602227fad962270da185209ecc6012f2',1,'cutlass::__align__(2) AlignedStruct< 2 >'],['../namespacecutlass.html#a266d7d2ae6e79537e46ee37b4fdface7',1,'cutlass::__align__(4) AlignedStruct< 4 >'],['../namespacecutlass.html#a1101e01215ddb0e5a7b120a4541a3c4e',1,'cutlass::__align__(8) AlignedStruct< 8 >'],['../namespacecutlass.html#aa4071cf5103f352a5100d9b4bba895e2',1,'cutlass::__align__(16) AlignedStruct< 16 >'],['../namespacecutlass.html#ada65694bdd4b70d4c9d769a536275a47',1,'cutlass::__align__(32) AlignedStruct< 32 >'],['../namespacecutlass.html#aa80a7cb3febd19b96f2ecbcb610b1b9e',1,'cutlass::__align__(64) AlignedStruct< 64 >']]] +]; diff --git a/docs/generated-html/search/functions_1.html b/docs/generated-html/search/functions_1.html new file mode 100644 index 00000000..4878b3d1 --- /dev/null +++ b/docs/generated-html/search/functions_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_1.js b/docs/generated-html/search/functions_1.js new file mode 100644 index 00000000..8b50e666 --- /dev/null +++ b/docs/generated-html/search/functions_1.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['advance',['advance',['../classcutlass_1_1TensorRef.html#aab0dafb81a462320e55e0dc4a5886478',1,'cutlass::TensorRef']]], + ['at',['at',['../structcutlass_1_1Coord.html#ad10b59430927a354fcd874d2d32f1bd8',1,'cutlass::Coord::at()'],['../structcutlass_1_1Coord.html#ab511a16210d1b94449f5bc6476f6a266',1,'cutlass::Coord::at(int dim)'],['../structcutlass_1_1Coord.html#af9cc7ab2088544d1240ac51c4c6e685d',1,'cutlass::Coord::at() const'],['../structcutlass_1_1Coord.html#aed4f4d1c7c0749fe72736d7a1213b6e9',1,'cutlass::Coord::at(int dim) const'],['../structcutlass_1_1FragmentIterator.html#a9cf31df06ff035705a1341810fcdcbf2',1,'cutlass::FragmentIterator::at(int d, int h, int w, int c=0) const'],['../structcutlass_1_1FragmentIterator.html#a7bdc407aae8d7360e089af347b585a53',1,'cutlass::FragmentIterator::at(int d, int h, int w, int c=0)'],['../structcutlass_1_1FragmentConstIterator.html#a8b957150545becacab1b8ead1be29424',1,'cutlass::FragmentConstIterator::at()'],['../structcutlass_1_1PredicateVector.html#ac8eca7087d1f7575b0c6beeb5f907bfd',1,'cutlass::PredicateVector::at()'],['../structcutlass_1_1TrivialPredicateTileAdapter.html#a3e41ab145489df08fca79251b2253d0f',1,'cutlass::TrivialPredicateTileAdapter::at()'],['../structcutlass_1_1PredicateTileAdapter.html#a7d54e877bca2e840c142293b4826e986',1,'cutlass::PredicateTileAdapter::at()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#a9e5651009a7b8df9960527c18c7b05dd',1,'cutlass::ConstPredicateTileAdapter::at()'],['../classcutlass_1_1TensorRef.html#a7eff42a37e4dbee488bfa726f3f0df4f',1,'cutlass::TensorRef::at(Coord< Rank > const &coord) const'],['../classcutlass_1_1TensorRef.html#a5702dea703104ab431c098c7b039c215',1,'cutlass::TensorRef::at(int idx) const'],['../classcutlass_1_1TensorView.html#ad894a8b373c413d308cb1b7c7ba545ce',1,'cutlass::TensorView::at(Coord_t const &coord) const'],['../classcutlass_1_1TensorView.html#acc55581896fae8c0449b44b56d750155',1,'cutlass::TensorView::at(Offset_t idx) const']]] +]; diff --git a/docs/generated-html/search/functions_10.html b/docs/generated-html/search/functions_10.html new file mode 100644 index 00000000..6f6fbae2 --- /dev/null +++ b/docs/generated-html/search/functions_10.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_10.js b/docs/generated-html/search/functions_10.js new file mode 100644 index 00000000..e7ab3ee1 --- /dev/null +++ b/docs/generated-html/search/functions_10.js @@ -0,0 +1,11 @@ +var searchData= +[ + ['tensorref',['TensorRef',['../classcutlass_1_1TensorRef.html#a54f6edc293b0b8ac97f02e8ab951c478',1,'cutlass::TensorRef::TensorRef()'],['../classcutlass_1_1TensorRef.html#ae48325312183ff61dbd312c64f31fcb8',1,'cutlass::TensorRef::TensorRef(Storage *ptr, Coord< Rank > stride)']]], + ['tensorview',['TensorView',['../classcutlass_1_1TensorView.html#a22401348796d603546e44d6c196018dc',1,'cutlass::TensorView::TensorView()'],['../classcutlass_1_1TensorView.html#a80480aa986a488a106a9b0aea331c317',1,'cutlass::TensorView::TensorView(TensorRef_t const &_ref, Coord_t const &_size)']]], + ['threadmultiplyadd',['ThreadMultiplyAdd',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#acec155117a56c942c5e695984b0f072d',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadMultiplyAdd()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a9b75e499f4c14369b5c86051dceeb81d',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadMultiplyAdd()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ab271a3f11ccde4b629ddb11b78c0d555',1,'cutlass::gemm::ThreadMultiplyAdd::ThreadMultiplyAdd()']]], + ['tileloaditerator',['TileLoadIterator',['../structcutlass_1_1TileLoadIterator.html#a81c9c0b17bf5f214230ecf10e0690a4e',1,'cutlass::TileLoadIterator::TileLoadIterator()'],['../structcutlass_1_1TileLoadIterator.html#a93e166575be3b2f7489833ae5da23f23',1,'cutlass::TileLoadIterator::TileLoadIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())'],['../structcutlass_1_1TileLoadIterator.html#a53282fa4cb33cfcec79033d26e418af6',1,'cutlass::TileLoadIterator::TileLoadIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())']]], + ['tilestoreiterator',['TileStoreIterator',['../structcutlass_1_1TileStoreIterator.html#aac4d49854d63f632627b6974f9b59dbb',1,'cutlass::TileStoreIterator::TileStoreIterator()'],['../structcutlass_1_1TileStoreIterator.html#a037ccd942359e6bc8640a240b13cd330',1,'cutlass::TileStoreIterator::TileStoreIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())'],['../structcutlass_1_1TileStoreIterator.html#a4f89c5182659de94605300e15c3651b2',1,'cutlass::TileStoreIterator::TileStoreIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())']]], + ['transform',['transform',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a4dd95354137d3cb52752ecdd346a5685',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#aa9fe67c947bf461ba3e3ca48daa34815',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1Copy.html#ab356f0f473aa3fd8df8fb8ddd8e0e9f3',1,'cutlass::Copy::transform(Fragment_ const &src, Fragment_ &dst)'],['../structcutlass_1_1Copy.html#a171f9a44c05b6fb432b0339979de4eb2',1,'cutlass::Copy::transform(InputFragment_ const &src, int offset, Fragment_ &dst)'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ad467ce744bf9d478900fb2661d7a1c26',1,'cutlass::gemm::HgemmSwizzle::transform()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a91ad48362b99a5f96ac1e92e95104f7b',1,'cutlass::gemm::IgemmFloatToInt8Converter::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a819fd33db88a68521108bab2641d73fd',1,'cutlass::gemm::IgemmFloatToInt8Converter::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#aca8a61e8eb1ab33b9c61e2e7d342379d',1,'cutlass::gemm::IgemmInt8ToFloatConverter::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a89e078dbf376da872c3993ccbaf744d3',1,'cutlass::gemm::IgemmInt8ToFloatConverter::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a084917a512c7a411b76a69f86b906811',1,'cutlass::gemm::IgemmSwizzle::transform()']]], + ['trivialiterator',['TrivialIterator',['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a6cb3664b5cba4280b7055a65ddad7850',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#ada8cd3ac6db568bb9bf268ba2c3a3e14',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator(Iterator const &it)'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a3adf0440f9a0143a61b43d39c3f03721',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator(PredicateVector const &_vec)']]], + ['trivialpredicatetileadapter',['TrivialPredicateTileAdapter',['../structcutlass_1_1TrivialPredicateTileAdapter.html#a7259853a129a7e319b972d3b41dd59d7',1,'cutlass::TrivialPredicateTileAdapter']]] +]; diff --git a/docs/generated-html/search/functions_11.html b/docs/generated-html/search/functions_11.html new file mode 100644 index 00000000..dd88d8b7 --- /dev/null +++ b/docs/generated-html/search/functions_11.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_11.js b/docs/generated-html/search/functions_11.js new file mode 100644 index 00000000..a0eb54d4 --- /dev/null +++ b/docs/generated-html/search/functions_11.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['unique_5fptr',['unique_ptr',['../classcutlass_1_1platform_1_1unique__ptr.html#aa8a370bc7e4c2d99eb85e7fea27b3179',1,'cutlass::platform::unique_ptr::unique_ptr()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a14c8bf5a5deefe4a6602ccd5c5af364c',1,'cutlass::platform::unique_ptr::unique_ptr(pointer p)']]] +]; diff --git a/docs/generated-html/search/functions_12.html b/docs/generated-html/search/functions_12.html new file mode 100644 index 00000000..7093d19f --- /dev/null +++ b/docs/generated-html/search/functions_12.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_12.js b/docs/generated-html/search/functions_12.js new file mode 100644 index 00000000..972cb652 --- /dev/null +++ b/docs/generated-html/search/functions_12.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['valid',['valid',['../structcutlass_1_1FragmentIterator.html#ab18f8ea676b45831f939715212167a99',1,'cutlass::FragmentIterator::valid()'],['../structcutlass_1_1FragmentConstIterator.html#a01571b2fc566793fd50a10fa82441951',1,'cutlass::FragmentConstIterator::valid()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ac4d2c293f9312b673ea29bf79b2882fd',1,'cutlass::gemm::GemmGlobalIteratorAb::valid()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6594acc213fc8d4289c6c73631f60120',1,'cutlass::gemm::GemmGlobalIteratorCd::valid()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a468f8f503777e4a2b0089ee2bd6c471a',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::valid()'],['../structcutlass_1_1TileIteratorBase.html#af78a2bf3e7507dc7f50343a3c209f770',1,'cutlass::TileIteratorBase::valid()']]] +]; diff --git a/docs/generated-html/search/functions_13.html b/docs/generated-html/search/functions_13.html new file mode 100644 index 00000000..051a1eb8 --- /dev/null +++ b/docs/generated-html/search/functions_13.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_13.js b/docs/generated-html/search/functions_13.js new file mode 100644 index 00000000..f2593b4a --- /dev/null +++ b/docs/generated-html/search/functions_13.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['wmmagemmglobaliteratorcd',['WmmaGemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a505f124fa3f47c6d57b7275e81be6dd3',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::WmmaGemmGlobalIteratorCd()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa5c14e2a799249fe8bba14aa1dbe69dc',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::WmmaGemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int const pointer_offset=0, int const pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())']]] +]; diff --git a/docs/generated-html/search/functions_14.html b/docs/generated-html/search/functions_14.html new file mode 100644 index 00000000..d5fdbda4 --- /dev/null +++ b/docs/generated-html/search/functions_14.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_14.js b/docs/generated-html/search/functions_14.js new file mode 100644 index 00000000..10f55890 --- /dev/null +++ b/docs/generated-html/search/functions_14.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['_7eunique_5fptr',['~unique_ptr',['../classcutlass_1_1platform_1_1unique__ptr.html#a8902399dac4ab64f08f909f2ad9d4bcf',1,'cutlass::platform::unique_ptr']]] +]; diff --git a/docs/generated-html/search/functions_2.html b/docs/generated-html/search/functions_2.html new file mode 100644 index 00000000..67d2a392 --- /dev/null +++ b/docs/generated-html/search/functions_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_2.js b/docs/generated-html/search/functions_2.js new file mode 100644 index 00000000..93a72e5a --- /dev/null +++ b/docs/generated-html/search/functions_2.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['begin',['begin',['../structcutlass_1_1PredicateVector.html#a649045d8224514a4c28bcaf4b247b4a5',1,'cutlass::PredicateVector']]] +]; diff --git a/docs/generated-html/search/functions_3.html b/docs/generated-html/search/functions_3.html new file mode 100644 index 00000000..1f0eedb3 --- /dev/null +++ b/docs/generated-html/search/functions_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_3.js b/docs/generated-html/search/functions_3.js new file mode 100644 index 00000000..b9f86bba --- /dev/null +++ b/docs/generated-html/search/functions_3.js @@ -0,0 +1,19 @@ +var searchData= +[ + ['check',['check',['../structcutlass_1_1platform_1_1is__base__of__helper.html#a5bf08859497e304ca353699ad6ac332b',1,'cutlass::platform::is_base_of_helper::check(DerivedT *, T)'],['../structcutlass_1_1platform_1_1is__base__of__helper.html#ae8896817cabf297437b3a073e693ffd2',1,'cutlass::platform::is_base_of_helper::check(BaseT *, int)']]], + ['clamp',['clamp',['../structcutlass_1_1Coord.html#a482ada6da62f427987c22098796fcf7e',1,'cutlass::Coord']]], + ['clear',['clear',['../structcutlass_1_1Fragment.html#a29e7408fcde8cdf9de5e3a10eaa46391',1,'cutlass::Fragment::clear()'],['../structcutlass_1_1gemm_1_1ClearAccumulators.html#adb8026a19b09e9a581ec767c2c2da4ab',1,'cutlass::gemm::ClearAccumulators::clear()']]], + ['clearaccumulators',['ClearAccumulators',['../structcutlass_1_1gemm_1_1ClearAccumulators.html#a4ba07ea6d6fef961de1cb95b13c672ef',1,'cutlass::gemm::ClearAccumulators']]], + ['commit',['commit',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a6ce2c6e81d159d8e9ab736cb263f44ae',1,'cutlass::gemm::GlobalLoadStreamBase::commit()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a9cc435369c7fc76d0bb6233a8258e257',1,'cutlass::gemm::SharedLoadStream::commit()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a6dc512be014b9d849057e2fd4c0b0485',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::commit()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#ade2d85507dec77591e66276339a1eef5',1,'cutlass::gemm::GemmTraits::SharedLoadStream::commit()']]], + ['const_5fbegin',['const_begin',['../structcutlass_1_1PredicateVector.html#aeb7f9226a4fa49d06500c3c83958dc41',1,'cutlass::PredicateVector']]], + ['const_5fend',['const_end',['../structcutlass_1_1PredicateVector.html#ab931610bc07ee0e87bb4d9a4d53a2321',1,'cutlass::PredicateVector']]], + ['const_5fref',['const_ref',['../classcutlass_1_1TensorView.html#a23564f1d333bb16343ed3a885f894285',1,'cutlass::TensorView']]], + ['constiterator',['ConstIterator',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a1216aab9c567ec0d4232019008ef3ea7',1,'cutlass::PredicateVector::ConstIterator::ConstIterator(ConstIterator const &it)'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a590e4f4533c87162c0b79e8d876a8fda',1,'cutlass::PredicateVector::ConstIterator::ConstIterator(PredicateVector const &_vec, int _start=0)']]], + ['constpredicatetileadapter',['ConstPredicateTileAdapter',['../structcutlass_1_1ConstPredicateTileAdapter.html#a9abd78d5c3e444bfb23d2b1a08be2be1',1,'cutlass::ConstPredicateTileAdapter']]], + ['contains',['contains',['../classcutlass_1_1TensorView.html#aa94063d9a9c6e599d3f53e22433274be',1,'cutlass::TensorView']]], + ['convert',['Convert',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a593a5a2c48708965e829d242ccb3b99f',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::Convert()'],['../classcutlass_1_1TensorRef.html#a7eb4444e2b3fce5a5ccde65a75df633c',1,'cutlass::TensorRef::convert()']]], + ['coord',['Coord',['../structcutlass_1_1Coord.html#a9cbfff91f0b0d0a149534c97e3d6e69b',1,'cutlass::Coord::Coord(int value=0)'],['../structcutlass_1_1Coord.html#a53a3d88a884f6cb7fda8aedfe2cec2c5',1,'cutlass::Coord::Coord(int _idx[])']]], + ['copy',['Copy',['../structcutlass_1_1Copy.html#ab2c20f886208396a1779c6d29b56c3f1',1,'cutlass::Copy::Copy()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#af7a15b4456cda01c1ffbb2fdc532e87e',1,'cutlass::gemm::GlobalLoadStreamBase::copy()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a7f6bf3b8d70bcd74d84519decd9f0d8e',1,'cutlass::gemm::SharedLoadStream::copy(FetchedFragment &fetched)'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a279144e9722055d4b862e3fa25948762',1,'cutlass::gemm::SharedLoadStream::copy(int d, FetchedFragment &fetched)'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#ae033f55779b45b4228f40a4d699062bb',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::copy()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#af25495bb0bb35bd64246d3a80fe4806f',1,'cutlass::gemm::GemmTraits::SharedLoadStream::copy()']]], + ['count',['count',['../structcutlass_1_1Coord.html#a40429a9154f7a142ad7e9eb35282d196',1,'cutlass::Coord']]], + ['cuda_5fperror_5fimpl',['cuda_perror_impl',['../namespacecutlass.html#a6d3dfeb642a2ce3d5f52243fe48f89cc',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/functions_4.html b/docs/generated-html/search/functions_4.html new file mode 100644 index 00000000..c5bf87a4 --- /dev/null +++ b/docs/generated-html/search/functions_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_4.js b/docs/generated-html/search/functions_4.js new file mode 100644 index 00000000..7288b405 --- /dev/null +++ b/docs/generated-html/search/functions_4.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['data',['data',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a3af66b82b1a0cc5bf6141f940553e048',1,'cutlass::gemm::GemmGlobalIteratorAb::data()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a0d3c1a58f23957f9850d1b22992a981a',1,'cutlass::gemm::GemmGlobalIteratorCd::data()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6fd4e62eb280a5b8c17eb79141414581',1,'cutlass::gemm::GemmGlobalIteratorCd::data() const'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#afe77778a126449e210c0bd6ec2dc6709',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::data()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a90e9886534ecbbce69f57b4030d0903f',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::data() const'],['../classcutlass_1_1TensorRef.html#a8e23c78658f45c6f197a1774cc85c5b7',1,'cutlass::TensorRef::data()'],['../classcutlass_1_1TensorView.html#a248e4240ccf96c976254464710a73fc8',1,'cutlass::TensorView::data()'],['../structcutlass_1_1TileLoadIterator.html#afb6320b600f1f561594a9fb543b954e4',1,'cutlass::TileLoadIterator::data()'],['../structcutlass_1_1TileStoreIterator.html#a5ebab59862d5f50ad980871515d999b0',1,'cutlass::TileStoreIterator::data()']]], + ['dot',['dot',['../structcutlass_1_1Coord.html#ad4b3704d14057c043f972827671115cf',1,'cutlass::Coord::dot(Coord const &b, T sum) const'],['../structcutlass_1_1Coord.html#ae023c0c664c22a978e9b9ce5e063aae4',1,'cutlass::Coord::dot(Coord const &b) const']]] +]; diff --git a/docs/generated-html/search/functions_5.html b/docs/generated-html/search/functions_5.html new file mode 100644 index 00000000..a34446ce --- /dev/null +++ b/docs/generated-html/search/functions_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_5.js b/docs/generated-html/search/functions_5.js new file mode 100644 index 00000000..64953e70 --- /dev/null +++ b/docs/generated-html/search/functions_5.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['end',['end',['../structcutlass_1_1PredicateVector.html#ad9493fc80fdc33330cc15641779cc275',1,'cutlass::PredicateVector']]], + ['epilogue',['epilogue',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ae1983e37454ed14272b23b964614c54c',1,'cutlass::gemm::GemmEpilogue']]], + ['epilogue_5fwith_5for_5fwithout_5fbeta',['epilogue_with_or_without_beta',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a0c24dce365565f75e7edc1de1cb50ea4',1,'cutlass::gemm::GemmEpilogue']]], + ['evaluate',['evaluate',['../structcutlass_1_1gemm_1_1LinearScaling.html#a2e0d140aed388d2457dfb24d28fcd08a',1,'cutlass::gemm::LinearScaling::evaluate(Fragment_ const &accum, Fragment_ &output)'],['../structcutlass_1_1gemm_1_1LinearScaling.html#a47a53e5b67b2207fb3ba38a8b9cef448',1,'cutlass::gemm::LinearScaling::evaluate(Fragment_ const &accum, Fragment_ const &old, Fragment_ &output)']]] +]; diff --git a/docs/generated-html/search/functions_6.html b/docs/generated-html/search/functions_6.html new file mode 100644 index 00000000..6fd4b1f3 --- /dev/null +++ b/docs/generated-html/search/functions_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_6.js b/docs/generated-html/search/functions_6.js new file mode 100644 index 00000000..3d4faf67 --- /dev/null +++ b/docs/generated-html/search/functions_6.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['fill',['fill',['../structcutlass_1_1PredicateVector.html#a236bd1a822479750a809452fd58dd917',1,'cutlass::PredicateVector']]], + ['fragment_5fa',['fragment_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a4a8c64d85aa012e3689dd024c486924b',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fragment_5fb',['fragment_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#aa28f34fb0c4bf739246d92c2fef80e0b',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fragmentconstiterator',['FragmentConstIterator',['../structcutlass_1_1FragmentConstIterator.html#ac4b6f351e6e72bed37e425f02a10c81e',1,'cutlass::FragmentConstIterator::FragmentConstIterator(OtherFragment_ &fragment, int offset=0)'],['../structcutlass_1_1FragmentConstIterator.html#a3a8fd8f13c157ed13dc93fd78036c59e',1,'cutlass::FragmentConstIterator::FragmentConstIterator(FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)']]], + ['fragmentiterator',['FragmentIterator',['../structcutlass_1_1FragmentIterator.html#ae1825fe3e138e2aa62d27dab2b5227b4',1,'cutlass::FragmentIterator']]], + ['fragmentmultiplyadd',['FragmentMultiplyAdd',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#af19e14a22aefd1124f7d31beec6f8c42',1,'cutlass::gemm::FragmentMultiplyAdd::FragmentMultiplyAdd()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a21f0965f6178917c7f5c6d79ed048059',1,'cutlass::gemm::FragmentMultiplyAdd< half >::FragmentMultiplyAdd()']]] +]; diff --git a/docs/generated-html/search/functions_7.html b/docs/generated-html/search/functions_7.html new file mode 100644 index 00000000..6e09abf1 --- /dev/null +++ b/docs/generated-html/search/functions_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_7.js b/docs/generated-html/search/functions_7.js new file mode 100644 index 00000000..3ce9c5d8 --- /dev/null +++ b/docs/generated-html/search/functions_7.js @@ -0,0 +1,17 @@ +var searchData= +[ + ['gcd',['gcd',['../namespacecutlass.html#a38481ebfe13bc199aa621ceecfa016b8',1,'cutlass']]], + ['gemm',['Gemm',['../structcutlass_1_1gemm_1_1Gemm.html#a8bff0bd32aec05f8c1e282024be0bcfd',1,'cutlass::gemm::Gemm']]], + ['gemm_5fkernel',['gemm_kernel',['../namespacecutlass_1_1gemm.html#ad9577c9086b0f7fd1202d7f8109e4439',1,'cutlass::gemm']]], + ['gemmepilogue',['GemmEpilogue',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ab10147070c3a38fca75397f55dc51925',1,'cutlass::gemm::GemmEpilogue']]], + ['gemmglobaliteratorab',['GemmGlobalIteratorAb',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a34cb153d311377388e7819296a84d07e',1,'cutlass::gemm::GemmGlobalIteratorAb']]], + ['gemmglobaliteratorcd',['GemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6dae81995ab94c0b7f28eeeeb84a6c8d',1,'cutlass::gemm::GemmGlobalIteratorCd::GemmGlobalIteratorCd()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a64f1df43acb37a1901f0b55becaa9557',1,'cutlass::gemm::GemmGlobalIteratorCd::GemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int offset=0, int pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())']]], + ['get',['get',['../classcutlass_1_1PredicateVector_1_1Iterator.html#af035589126434bd2dbef4000cd864b8b',1,'cutlass::PredicateVector::Iterator::get()'],['../structcutlass_1_1ComputeOffsetFromShape.html#a3c6f60a59178ffb84899aa449bd51d38',1,'cutlass::ComputeOffsetFromShape::get()'],['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html#a5198e838e3892245fe7b10884555ec93',1,'cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >::get()'],['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html#a11bf40abc57580db5ce4b0fd4c3e55ff',1,'cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >::get()'],['../structcutlass_1_1ComputeOffsetFromStrides.html#af5e46bc2b325cb6952d2d68c8aca1409',1,'cutlass::ComputeOffsetFromStrides::get()'],['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html#acdbb9c7cdf9fc054656614f72396434e',1,'cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >::get()'],['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html#a512a9d46f6bea9d85641d7263bcfee36',1,'cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides.html#a1744bfe277cbe0c642cce4a48c1dd9ad',1,'cutlass::ComputeThreadOffsetFromStrides::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html#a5d446b2663c01362361e09435a726996',1,'cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html#a6e621f5fae2ba29277fde46be1cede24',1,'cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >::get()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a2e7c14b8a118f81c1df46ea5045e297b',1,'cutlass::platform::unique_ptr::get()']]], + ['get_5fcoord_5fdhw',['get_Coord_dhw',['../namespacecutlass.html#a4680709eeeb679ef0219938f85f7394e',1,'cutlass']]], + ['get_5fcoord_5fhw',['get_Coord_hw',['../namespacecutlass.html#a7d2ab683e29b47d245e183ad5aeb962e',1,'cutlass::get_Coord_hw(Coord< 3 > const &coord)'],['../namespacecutlass.html#a082e7a2e4acc2879468243f5732ccf0b',1,'cutlass::get_Coord_hw(Coord< 4 > const &coord)']]], + ['get_5fcoord_5fhwc',['get_Coord_hwc',['../namespacecutlass.html#a71f3e2a12b9e98be1fba082610fa9d4f',1,'cutlass']]], + ['get_5fdeleter',['get_deleter',['../classcutlass_1_1platform_1_1unique__ptr.html#a5b8d8ecafb4da336acd50e40cd42b6e0',1,'cutlass::platform::unique_ptr::get_deleter() noexcept'],['../classcutlass_1_1platform_1_1unique__ptr.html#aa427ab4ea4f2336ac6db28d53a4c11ac',1,'cutlass::platform::unique_ptr::get_deleter() const noexcept']]], + ['globalloadstream',['GlobalLoadStream',['../structcutlass_1_1gemm_1_1GlobalLoadStream.html#a4dd11a75375b6b9d7b8dcbd4d402d8d6',1,'cutlass::gemm::GlobalLoadStream::GlobalLoadStream()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#ab2961b4db0694cf128d55d38a98db575',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::GlobalLoadStream()']]], + ['globalloadstreambase',['GlobalLoadStreamBase',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0fdc0f56d1352b5ad41fd4985edd3278',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['good',['good',['../classcutlass_1_1TensorRef.html#a0c049e523ee0fc98769ed8cd2d026780',1,'cutlass::TensorRef::good()'],['../classcutlass_1_1TensorView.html#a837881bc82704491accf54aad2b9def9',1,'cutlass::TensorView::good()']]] +]; diff --git a/docs/generated-html/search/functions_8.html b/docs/generated-html/search/functions_8.html new file mode 100644 index 00000000..d59ea971 --- /dev/null +++ b/docs/generated-html/search/functions_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_8.js b/docs/generated-html/search/functions_8.js new file mode 100644 index 00000000..9418317c --- /dev/null +++ b/docs/generated-html/search/functions_8.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['hgemmswizzle',['HgemmSwizzle',['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ac3c52e0fee9b37a3dfc39ca168a63d36',1,'cutlass::gemm::HgemmSwizzle']]] +]; diff --git a/docs/generated-html/search/functions_9.html b/docs/generated-html/search/functions_9.html new file mode 100644 index 00000000..5ccec429 --- /dev/null +++ b/docs/generated-html/search/functions_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_9.js b/docs/generated-html/search/functions_9.js new file mode 100644 index 00000000..04276e19 --- /dev/null +++ b/docs/generated-html/search/functions_9.js @@ -0,0 +1,22 @@ +var searchData= +[ + ['identityblockswizzle',['IdentityBlockSwizzle',['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html#abfde9b316173b1c0b8622cf22ffb6d68',1,'cutlass::gemm::IdentityBlockSwizzle']]], + ['igemmepilogue',['IgemmEpilogue',['../structcutlass_1_1gemm_1_1IgemmEpilogue.html#ab7a51121d24250d6441ee538e6521dc2',1,'cutlass::gemm::IgemmEpilogue::IgemmEpilogue()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html#a49ac00bed1532707aacd3ff108c84623',1,'cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::IgemmEpilogue()']]], + ['igemmfloattoint8converter',['IgemmFloatToInt8Converter',['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#ac65f020e93584b1bd3cdb849ff625026',1,'cutlass::gemm::IgemmFloatToInt8Converter']]], + ['igemmint8tofloatconverter',['IgemmInt8ToFloatConverter',['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a88a55a494d3a30d50477d50bf6a8804d',1,'cutlass::gemm::IgemmInt8ToFloatConverter']]], + ['igemmswizzle',['IgemmSwizzle',['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#ac041d287c966cf568599d7e462e81d5a',1,'cutlass::gemm::IgemmSwizzle']]], + ['inc_5fadvance',['inc_advance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a9dea455aa86bb59517b4a4d0309e424b',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_advance()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ab4b8150f19c9f8649d75c69ec0a76e1a',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a174ae7d8aa0664eaf1d6f63c5606baa0',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_advance()'],['../structcutlass_1_1TileLoadIterator.html#a91e13a7aad4b0acac002b6dd125abc37',1,'cutlass::TileLoadIterator::inc_advance()'],['../structcutlass_1_1TileStoreIterator.html#a1614b27755cf82c0e1f3e7852c5a4c75',1,'cutlass::TileStoreIterator::inc_advance()']]], + ['inc_5fc',['inc_c',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a12ead84ea9634e963d10c6df7b7792c9',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_c()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a44287250bf5631a490b514859fd101d1',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_c()']]], + ['inc_5fd',['inc_d',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a1e42503e5a54cdc01308e9030aebdd35',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_d()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ad26ab8d8010c9a1d7f3b91f60940b460',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_d()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab1ebbe54e4315ac07daf260a88f41d04',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_d()'],['../structcutlass_1_1TileLoadIterator.html#a0a93f37fd366a48c4ed6cc39aa850eb5',1,'cutlass::TileLoadIterator::inc_d()'],['../structcutlass_1_1TileStoreIterator.html#a74dffe1ddcc84935ab170117e939b7e3',1,'cutlass::TileStoreIterator::inc_d()']]], + ['inc_5fh',['inc_h',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#aa24336597f4a3316d94df6ab0c20f714',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_h()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ae07fa10a53d44471a04275145201299e',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa9a733f35e9be67663c9c8f80b0034d4',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_h()'],['../structcutlass_1_1TileLoadIterator.html#a228a95cf2c9c6089287984fcbf5cface',1,'cutlass::TileLoadIterator::inc_h()'],['../structcutlass_1_1TileStoreIterator.html#a3793f5d5846862f22f1de736e36ae7c1',1,'cutlass::TileStoreIterator::inc_h()']]], + ['inc_5fstage',['inc_stage',['../structcutlass_1_1gemm_1_1SharedLoadStream.html#acf22fd09aa537943c16b900d66f1ec6f',1,'cutlass::gemm::SharedLoadStream::inc_stage()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a8851150a49e4a9c135279c8c9dfdc592',1,'cutlass::gemm::GemmTraits::SharedLoadStream::inc_stage()'],['../structcutlass_1_1TileLoadIterator.html#aeb3faf5e8f976f5a4d158ceb41a1cc64',1,'cutlass::TileLoadIterator::inc_stage()'],['../structcutlass_1_1TileStoreIterator.html#a187e0852ec4862f6d3cb6249bedc3bb3',1,'cutlass::TileStoreIterator::inc_stage()']]], + ['inc_5fw',['inc_w',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a622a4dd27162854ec96efea93cdd4380',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_w()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aec2d692967d9be5d42673dfde21f5427',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_w()'],['../structcutlass_1_1TileLoadIterator.html#a49cf3ee608debebf451cdd8c2125d073',1,'cutlass::TileLoadIterator::inc_w()'],['../structcutlass_1_1TileStoreIterator.html#aa573a47a9ffc3e07239a09e2bc470cf1',1,'cutlass::TileStoreIterator::inc_w()']]], + ['initialize',['initialize',['../structcutlass_1_1gemm_1_1Gemm_1_1Params.html#ac00c9d78a187d9c7d53399f971c0e129',1,'cutlass::gemm::Gemm::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a3e9d0fd2989fea776b0cab0e0f2813ce',1,'cutlass::gemm::GemmEpilogueTraits::Params::initialize()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a7c7e448384156c801ed362359a1a6a40',1,'cutlass::gemm::GlobalLoadStreamBase::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html#a73091e07b6d4c99f6e0319fbf6bd1709',1,'cutlass::gemm::GemmGlobalIteratorAb::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#af5a496f1b6a46ea6a9894512029add6a',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::initialize()'],['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html#adb66103b905b35a1594c6f0bab65758a',1,'cutlass::gemm::SharedLoadStream::Params::initialize()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a8e4d277325bb5e56c718a2298b60d3cf',1,'cutlass::gemm::SharedLoadStream::initialize()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a40023f0ffdd8bee4ccbcaac28222e983',1,'cutlass::gemm::GemmTraits::Params::initialize()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a4946e45e10661307f562b27bad5cb72d',1,'cutlass::gemm::LinearScaling::Params::initialize()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#ad6b65c5f3ed7cd9e7ffeb684cbf30d04',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::initialize()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#ad2631ffcc963638aa5b016c66a2e2c55',1,'cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a3ba93370bd4b2ede4bd4eb97ac0881be',1,'cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w)'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#af496afebb8983e5d346c681334955224',1,'cutlass::TileIteratorBase::Params::initialize()'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aeeea0f8bdee876553a4908b9b7cbaf76',1,'cutlass::TileLoadIterator::Params::initialize(SharedStorage const &storage)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#afd9e82df76ad35fe883b7834457242b2',1,'cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aa3922946bb0da0c0040dec44aa389ec1',1,'cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aebaecd0f971245ffc5a50fe5f7a9b4e8',1,'cutlass::TileLoadIterator::Params::initialize()'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#a71f5238a712f7b2f377fb58938ac829b',1,'cutlass::TileStoreIterator::Params::initialize(SharedStorage &storage)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#af0d26a2df2a1a5ba3c3169b736bd5d43',1,'cutlass::TileStoreIterator::Params::initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#ac1cfe92f1543ba445fa10f1859a0db98',1,'cutlass::TileStoreIterator::Params::initialize(Scalar *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#af884f720d36aa82e7f972932686ae986',1,'cutlass::TileStoreIterator::Params::initialize()']]], + ['initialize_5fpredicates',['initialize_predicates',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ab9375d9e779dcda79a5cd561bb3762ff',1,'cutlass::gemm::GemmGlobalIteratorAb::initialize_predicates()'],['../structcutlass_1_1TileIteratorBase.html#a78b6c0d6a1a96dd55a34bc302ecb07d7',1,'cutlass::TileIteratorBase::initialize_predicates()'],['../structcutlass_1_1TileLoadIterator.html#a8291a51bf96f86bc77d0e3453345dbd5',1,'cutlass::TileLoadIterator::initialize_predicates()'],['../structcutlass_1_1TileStoreIterator.html#af92ba20db048a9ec96976a1673f0f7c2',1,'cutlass::TileStoreIterator::initialize_predicates()']]], + ['is_5fzero',['is_zero',['../structcutlass_1_1PredicateVector.html#a1c4fe2bec906cd7937428ed6561ac79a',1,'cutlass::PredicateVector::is_zero()'],['../namespacecutlass_1_1gemm.html#a3e30ae89e6f7501725028144cd2d88cb',1,'cutlass::gemm::is_zero(T x)'],['../namespacecutlass_1_1gemm.html#a4a12fcfae60f26efa47bf0a79483d8ac',1,'cutlass::gemm::is_zero(half x)']]], + ['iterator',['Iterator',['../classcutlass_1_1PredicateVector_1_1Iterator.html#a91b7d25cbd64e696ef23c87671f0b077',1,'cutlass::PredicateVector::Iterator::Iterator(Iterator const &it)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a83c2f584bd061f0b9b6b2a6cddf5b038',1,'cutlass::PredicateVector::Iterator::Iterator(PredicateVector &_vec, int _start=0)']]], + ['iterator_5fload',['iterator_load',['../namespacecutlass.html#a45dd7add04736cb5c3e69991d2f210be',1,'cutlass::iterator_load(InputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#a50f08aa93d7fe6825599d17e3c977031',1,'cutlass::iterator_load(InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#aca491136bdb966638a7ae57c47f86d1e',1,'cutlass::iterator_load(InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0)'],['../namespacecutlass.html#af25d56f7391322d9a3b9aa3c507f90dc',1,'cutlass::iterator_load(InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fload_5fpost_5fincrement',['iterator_load_post_increment',['../namespacecutlass.html#a3965068d8a4fdfe5e05782930fb4fe6b',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#af5abe551df7461eab66aa43907063d6b',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0)'],['../namespacecutlass.html#afb8e7a4e611e8b5ae7ca19d02f791d37',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fstore',['iterator_store',['../namespacecutlass.html#a0cb5bdf7bef498705c51a9cdcbef71f9',1,'cutlass::iterator_store(OutputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#a88dce4b124a294cc123f7cf5fd2d6472',1,'cutlass::iterator_store(OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#a410ed4d45ccafc2db842967740b6211f',1,'cutlass::iterator_store(OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)'],['../namespacecutlass.html#ad804b804ac19360b293046f9cbfd8dd5',1,'cutlass::iterator_store(OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fstore_5fpost_5fincrement',['iterator_store_post_increment',['../namespacecutlass.html#a5bf15cbf4cf4649d895fcbc2edf6a2de',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#ab8efb0edefca7a59acc5a14b7311130c',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)'],['../namespacecutlass.html#a96fdb65e922f6a3d46aa5de9ea78d460',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)']]] +]; diff --git a/docs/generated-html/search/functions_a.html b/docs/generated-html/search/functions_a.html new file mode 100644 index 00000000..3958eb7b --- /dev/null +++ b/docs/generated-html/search/functions_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_a.js b/docs/generated-html/search/functions_a.js new file mode 100644 index 00000000..61f3d1a7 --- /dev/null +++ b/docs/generated-html/search/functions_a.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['launch',['launch',['../structcutlass_1_1gemm_1_1Gemm.html#a77ae137aec79b4061a9ffa09aabf641c',1,'cutlass::gemm::Gemm::launch(Params const &params, cudaStream_t stream=cudaStreamDefault)'],['../structcutlass_1_1gemm_1_1Gemm.html#a4f4122a2ae8b9b09a9660e5c2ca9e906',1,'cutlass::gemm::Gemm::launch(CUfunction kernel, Params const &params, CUstream stream=CU_STREAM_LEGACY)']]], + ['lcm',['lcm',['../namespacecutlass.html#af07506fee11de882d926f4e8237eef09',1,'cutlass']]], + ['leading_5fdim',['leading_dim',['../classcutlass_1_1TensorRef.html#a8e1c61910ffb49ec64930f66dd342b77',1,'cutlass::TensorRef']]], + ['linearscaling',['LinearScaling',['../structcutlass_1_1gemm_1_1LinearScaling.html#a34df6970f033b3090ad8f4d40063b1b2',1,'cutlass::gemm::LinearScaling']]], + ['load',['load',['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html#a01a847858cb330d7d109ddee228e96ce',1,'cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html#a014682b143bce65667075ea15fad184d',1,'cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load()'],['../structcutlass_1_1Load.html#ad033ebc1452d96b18913333bf7068140',1,'cutlass::Load::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#aa9d5e227ea20ad3c6952f296016ec167',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a0e58d26dd68aabb6cb9678f5656c7e6f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::load()'],['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#a7ba77016bee8e941f7831cc9fbfa994d',1,'cutlass::Load< double, 2, Memory_, true, 16 >::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#a4ee00178c441bdf4d4a1f8cf984bc03f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::load()'],['../structcutlass_1_1TileLoadIterator.html#a9c4b332857f419e6f789a93404dc2140',1,'cutlass::TileLoadIterator::load(Fragment &fragment, PredicateIterator pred_it) const'],['../structcutlass_1_1TileLoadIterator.html#a1058cdec33393db9c16b28c21d8957db',1,'cutlass::TileLoadIterator::load(Fragment &fragment) const']]], + ['load_5fpost_5fincrement',['load_post_increment',['../structcutlass_1_1TileLoadIterator.html#a2716b9010d2902b90e63abb0531ee915',1,'cutlass::TileLoadIterator::load_post_increment(Fragment &fragment, PredicateIterator pred_it)'],['../structcutlass_1_1TileLoadIterator.html#a195993d58ae0eeb53203116ac02ab38d',1,'cutlass::TileLoadIterator::load_post_increment(Fragment &fragment)']]] +]; diff --git a/docs/generated-html/search/functions_b.html b/docs/generated-html/search/functions_b.html new file mode 100644 index 00000000..b99b702d --- /dev/null +++ b/docs/generated-html/search/functions_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_b.js b/docs/generated-html/search/functions_b.js new file mode 100644 index 00000000..7b4b2eb9 --- /dev/null +++ b/docs/generated-html/search/functions_b.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['make_5fcoord',['make_Coord',['../namespacecutlass.html#a7419519fa453a121dfa5f26bf87318d9',1,'cutlass::make_Coord(int _0)'],['../namespacecutlass.html#a61d81e5363bcb8a7f6dd70f053242564',1,'cutlass::make_Coord(int _0, int _1)'],['../namespacecutlass.html#a25acf680a7d2592c957a7ac603f4c361',1,'cutlass::make_Coord(int _0, int _1, int _2)'],['../namespacecutlass.html#a9410b1f5956d3aaf4584e65d047428fc',1,'cutlass::make_Coord(int _0, int _1, int _2, int _3)']]], + ['make_5fpair',['make_pair',['../namespacecutlass_1_1platform.html#a90ce74c7faa4e27c888ce56e957b73d5',1,'cutlass::platform']]], + ['make_5fzero',['make_zero',['../namespacecutlass.html#acdb62db582cf90cfd437fc56f4ca7bbf',1,'cutlass::make_zero(Scalar_ &x)'],['../namespacecutlass.html#abc5c00b4986db5a114e774cee9999717',1,'cutlass::make_zero(Vector< Scalar_, kLanes_ > &vec)']]], + ['max',['max',['../namespacecutlass_1_1platform.html#af6a9a165e53d7e85ae121d5789aa03e0',1,'cutlass::platform']]], + ['min',['min',['../namespacecutlass_1_1platform.html#a57c071d2a7305dd4ec60542e66b0c81c',1,'cutlass::platform']]], + ['multiply',['multiply',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a522301fbe3e276cb5ef9fbe75bb2ab50',1,'cutlass::gemm::FragmentMultiplyAdd::multiply()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#ae62d61ec068ac958753d0a2f5a99d8e2',1,'cutlass::gemm::FragmentMultiplyAdd< half >::multiply()']]], + ['multiply_5fadd',['multiply_add',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a34bbf209967fef6181d3d46dd27fa0c0',1,'cutlass::gemm::FragmentMultiplyAdd::multiply_add()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a89c8b663af69f13c2a02cb464b5172a5',1,'cutlass::gemm::FragmentMultiplyAdd< half >::multiply_add()'],['../structcutlass_1_1gemm_1_1Gemm.html#a2e844037d2527b842de3590cb783a49f',1,'cutlass::gemm::Gemm::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a66486d38349fa20eb065ae9542eb43aa',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#ad22dd143c304c22c2630aedbfd3459af',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a5dcf66c8126ec8adf8e66d4bf5b2f347',1,'cutlass::gemm::ThreadMultiplyAdd::multiply_add()']]] +]; diff --git a/docs/generated-html/search/functions_c.html b/docs/generated-html/search/functions_c.html new file mode 100644 index 00000000..3a33d874 --- /dev/null +++ b/docs/generated-html/search/functions_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_c.js b/docs/generated-html/search/functions_c.js new file mode 100644 index 00000000..14ca492a --- /dev/null +++ b/docs/generated-html/search/functions_c.js @@ -0,0 +1,31 @@ +var searchData= +[ + ['offset',['offset',['../classcutlass_1_1TensorRef.html#a02ee5d16ed4ce4705a99bb16b2ae1ae8',1,'cutlass::TensorRef::offset()'],['../classcutlass_1_1TensorView.html#a064f3630e69798e7915f910c4ee99ab7',1,'cutlass::TensorView::offset()']]], + ['operator_20_26_3d',['operator &=',['../structcutlass_1_1PredicateVector.html#a3dd9aeba8f3cbe7a8198d68d91a0bbb9',1,'cutlass::PredicateVector']]], + ['operator_20b_2a',['operator B*',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html#a8d100273203db9018dffbbe84e0b6c76',1,'cutlass::platform::is_base_of_helper::dummy']]], + ['operator_20bool',['operator bool',['../classcutlass_1_1platform_1_1unique__ptr.html#a5791650488ae864f10ad04bec4a31005',1,'cutlass::platform::unique_ptr']]], + ['operator_20d_2a',['operator D*',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html#a8aadc500baf1492b1a4d05cc8b35fc13',1,'cutlass::platform::is_base_of_helper::dummy']]], + ['operator_20value_5ftype',['operator value_type',['../structcutlass_1_1platform_1_1integral__constant.html#a55d25116387f1c6d978462b1d245d675',1,'cutlass::platform::integral_constant']]], + ['operator_21_3d',['operator!=',['../structcutlass_1_1Coord.html#a7fb46873e8f3cf38212703d35bd36995',1,'cutlass::Coord::operator!=()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a3d06715a77740034697686a7977cb685',1,'cutlass::PredicateVector::ConstIterator::operator!=()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a08cb4d1395b88a4451fbb1a27e010887',1,'cutlass::PredicateVector::Iterator::operator!=()'],['../namespacecutlass_1_1platform.html#a248f49adf09654d2cd04bd2760ab2566',1,'cutlass::platform::operator!=()']]], + ['operator_28_29',['operator()',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html#ab8adb983c0573a0015469f40a75287be',1,'cutlass::gemm::GemmGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html#abaf0d4459a64b3e9533758b59600bd52',1,'cutlass::gemm::GemmGlobalTileCdTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html#a1e357fe5bc1daef333e6be776a21a2ca',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html#a4e35f0b2ca63a6b981230b73f843f726',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html#a51a325b435b9a53effaa003b3670e410',1,'cutlass::gemm::GemmSharedLoadTileATraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html#a5b4a635a521364357386259b0f84c0ba',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html#a4f9cca16303ac9ae29a0eaa11dcc23b6',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html#ace1b936cab289c6884e673312283d422',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html#a9fc1ca09733113f80fe5fe45db3d9b81',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html#a1228edf6cc0f81af520dc77c8792b94c',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html#ad7537f8b30ee6913cf4afa1d3c054e68',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::ThreadOffset::operator()()'],['../structcutlass_1_1TiledThreadOffset.html#a7290b6ca9ef0bede634f69bd05450fa2',1,'cutlass::TiledThreadOffset::operator()()'],['../structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html#a0e4edffb19218ccbf77995f6d20df000',1,'cutlass::TileTraitsWarpRake::ThreadOffset::operator()()'],['../structcutlass_1_1platform_1_1plus.html#a3bf1e5147df4287bf58ad8f11ea0d98c',1,'cutlass::platform::plus::operator()()'],['../structcutlass_1_1platform_1_1less.html#adfb49ee70a700a8483c70b4b353f6bc5',1,'cutlass::platform::less::operator()()'],['../structcutlass_1_1platform_1_1greater.html#a8d56cf343dd33acebe19d0b51abe3978',1,'cutlass::platform::greater::operator()()'],['../structcutlass_1_1platform_1_1integral__constant.html#a5271a533526a535ae8b783c736252f18',1,'cutlass::platform::integral_constant::operator()()'],['../structcutlass_1_1platform_1_1default__delete.html#a59e6e3cc95685ac34fa6f9cf301b3a15',1,'cutlass::platform::default_delete::operator()()'],['../structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html#a16c5595a5aec7d7ee34e38bef4a66c87',1,'cutlass::platform::default_delete< T[]>::operator()()']]], + ['operator_2a',['operator*',['../structcutlass_1_1Coord.html#a8e4f7df55a75d040cf50cf9984c04c8a',1,'cutlass::Coord::operator*()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#abbc2bceb6cf8d7f168b8a00eb48c0946',1,'cutlass::PredicateVector::ConstIterator::operator*()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a049b568e0f5de011ee76ce79bcedbab4',1,'cutlass::PredicateVector::Iterator::operator*()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a78016158f99dd87e822a2a2cbd4cec78',1,'cutlass::PredicateVector::TrivialIterator::operator*()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a45a3cb6d8641a6130991d56e84cbb38b',1,'cutlass::platform::unique_ptr::operator*()']]], + ['operator_2a_3d',['operator*=',['../structcutlass_1_1Coord.html#a282b6cc9ac8b2f72720c252791155aad',1,'cutlass::Coord']]], + ['operator_2b',['operator+',['../structcutlass_1_1Coord.html#a3dfc4ce4191097b6c3268696f2a45ef5',1,'cutlass::Coord::operator+()'],['../classcutlass_1_1TensorRef.html#aa7b80d225c01c9dc12aafc515cf15842',1,'cutlass::TensorRef::operator+()']]], + ['operator_2b_2b',['operator++',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a10ee4bb2f206432aa5ee1a83cb046b70',1,'cutlass::PredicateVector::ConstIterator::operator++()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a977a99af3166a58d5bc5a613a1abe7d5',1,'cutlass::PredicateVector::ConstIterator::operator++(int)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a7dddc0a6b5c958156beef29bedfd1bd3',1,'cutlass::PredicateVector::Iterator::operator++()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a6c7333ad14d545cafc707e78752bf1e3',1,'cutlass::PredicateVector::Iterator::operator++(int)'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#ad24e9b451064e99fb19955f772c30e6a',1,'cutlass::PredicateVector::TrivialIterator::operator++()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#aa35b9165920b83b9a5a888df83925051',1,'cutlass::PredicateVector::TrivialIterator::operator++(int)']]], + ['operator_2b_3d',['operator+=',['../structcutlass_1_1Coord.html#aeb209486943fa9d42911325b16e49e09',1,'cutlass::Coord']]], + ['operator_2d',['operator-',['../structcutlass_1_1Coord.html#acc510511ffb52bed7f6a52f14b99750d',1,'cutlass::Coord::operator-()'],['../classcutlass_1_1TensorRef.html#a3843ccfd1d097f25eff45dc159709938',1,'cutlass::TensorRef::operator-()']]], + ['operator_2d_2d',['operator--',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a2763012a9284e97650b14e20c5668286',1,'cutlass::PredicateVector::ConstIterator::operator--()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a2910a714d34a688b8ea560ea2933436b',1,'cutlass::PredicateVector::ConstIterator::operator--(int)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a69fb5b24eeb43331b7401768e8584e61',1,'cutlass::PredicateVector::Iterator::operator--()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#aad709a11f43b84c88e3ce3a0394f8e8a',1,'cutlass::PredicateVector::Iterator::operator--(int)']]], + ['operator_2d_3d',['operator-=',['../structcutlass_1_1Coord.html#ac1795ec2a5890d8a39840567a4bea88e',1,'cutlass::Coord']]], + ['operator_2d_3e',['operator->',['../classcutlass_1_1platform_1_1unique__ptr.html#afa52edcaef23461ce1f9c1dac349c24b',1,'cutlass::platform::unique_ptr']]], + ['operator_2f',['operator/',['../structcutlass_1_1Coord.html#a87f485be079fa68bcf576da4d56f0ece',1,'cutlass::Coord']]], + ['operator_2f_3d',['operator/=',['../structcutlass_1_1Coord.html#abe91e59962ef0d73aec9c14824f64ecc',1,'cutlass::Coord']]], + ['operator_3c',['operator<',['../namespacecutlass_1_1platform.html#a412dbdbc678ecd12b55fcad4ef4155bd',1,'cutlass::platform']]], + ['operator_3c_3c',['operator<<',['../core__io_8h.html#a4a0d84a2a19a11549b87a2328d58690d',1,'core_io.h']]], + ['operator_3c_3d',['operator<=',['../namespacecutlass_1_1platform.html#a41d573133357bd555f78d33afc1152d3',1,'cutlass::platform']]], + ['operator_3d',['operator=',['../classcutlass_1_1TensorView.html#aa9e9e19f35ce3111f64b763ca49b51ef',1,'cutlass::TensorView']]], + ['operator_3d_3d',['operator==',['../structcutlass_1_1Coord.html#acfa94aabd0c9a71ee994ca479d5f515f',1,'cutlass::Coord::operator==()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#aa2d03d88ac23051803d010f78157c357',1,'cutlass::PredicateVector::ConstIterator::operator==()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a5c5266fcef67c7b263682c4bc4a5000e',1,'cutlass::PredicateVector::Iterator::operator==()'],['../namespacecutlass_1_1platform.html#ab9b8306ae9dc21fa646c49b68fa8e197',1,'cutlass::platform::operator==()']]], + ['operator_3e',['operator>',['../namespacecutlass_1_1platform.html#a9e8e698d40b8df881991fde9ba2a1b12',1,'cutlass::platform']]], + ['operator_3e_3d',['operator>=',['../namespacecutlass_1_1platform.html#ab0f21e67c0a4b5c6952042b502c6816f',1,'cutlass::platform']]], + ['operator_5b_5d',['operator[]',['../structcutlass_1_1Coord.html#ab7fc89de3ccd7096ab275fb5dd40104c',1,'cutlass::Coord::operator[](int dim)'],['../structcutlass_1_1Coord.html#a6eeab0a1686ee25389e1bd017c5f03ae',1,'cutlass::Coord::operator[](int dim) const'],['../structcutlass_1_1Fragment.html#a99fef5f3093b2df50905ab13819b67a0',1,'cutlass::Fragment::operator[](int i)'],['../structcutlass_1_1Fragment.html#a75f51bb6ca84615076aab42ac9d42592',1,'cutlass::Fragment::operator[](int i) const'],['../structcutlass_1_1FragmentIterator.html#a83bb6a3ed588e2d890bf986665d2b7bb',1,'cutlass::FragmentIterator::operator[](int i) const'],['../structcutlass_1_1FragmentIterator.html#a3bd2a9d8467f8db02ca3a01ae0c11ad7',1,'cutlass::FragmentIterator::operator[](int i)'],['../structcutlass_1_1FragmentConstIterator.html#af16f2aa14ff424b038a393b683c4783e',1,'cutlass::FragmentConstIterator::operator[]()'],['../structcutlass_1_1PredicateVector.html#a840985438ac8306ec680eb20edd4e5c5',1,'cutlass::PredicateVector::operator[]()'],['../classcutlass_1_1TensorRef.html#a6a2aa88ed77557c089a165da0df1e974',1,'cutlass::TensorRef::operator[](Coord< Rank > const &coord) const'],['../classcutlass_1_1TensorRef.html#a34e97ab2190b4681d1c1199186d66f1c',1,'cutlass::TensorRef::operator[](int idx) const'],['../classcutlass_1_1TensorView.html#a7fe7e44e15fd1ac58fb55edf72e8fb23',1,'cutlass::TensorView::operator[]()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a5c7a204af07a7d325b0a8303e199a50d',1,'cutlass::platform::unique_ptr::operator[]()'],['../unioncutlass_1_1Vector.html#a250860c921c94a6077344f9e11bf5b02',1,'cutlass::Vector::operator[](uint32_t i) const'],['../unioncutlass_1_1Vector.html#a44cc27bf8a7b789b4ae8538155a50156',1,'cutlass::Vector::operator[](uint32_t i)'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#ab0516cef8949f5998b5251cc6b6db683',1,'cutlass::Vector< half, kLanes_ >::operator[](uint32_t i) const'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a8ade80e040264fbd669d3f15c249884e',1,'cutlass::Vector< half, kLanes_ >::operator[](uint32_t i)']]], + ['operator_7c_3d',['operator|=',['../structcutlass_1_1PredicateVector.html#aab9de134132c62de1c062ca57582cdbc',1,'cutlass::PredicateVector']]] +]; diff --git a/docs/generated-html/search/functions_d.html b/docs/generated-html/search/functions_d.html new file mode 100644 index 00000000..31b75b88 --- /dev/null +++ b/docs/generated-html/search/functions_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_d.js b/docs/generated-html/search/functions_d.js new file mode 100644 index 00000000..8aefe3b0 --- /dev/null +++ b/docs/generated-html/search/functions_d.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['predicatetileadapter',['PredicateTileAdapter',['../structcutlass_1_1PredicateTileAdapter.html#a4c9eb6c6498ccf117427a3b35f7ce5ea',1,'cutlass::PredicateTileAdapter']]], + ['predicatevector',['PredicateVector',['../structcutlass_1_1PredicateVector.html#aec1201df19c0ed0516810a3f19353c21',1,'cutlass::PredicateVector']]], + ['project',['project',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html#ae91b2350374f1734a30cbed45e14b8e3',1,'cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html#a0f1579013f56fe16ebc147271f163c3c',1,'cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html#af2a323461334a6b55b95074a1973d250',1,'cutlass::gemm::ProjectOperand< GemmOperand::kC, true >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html#ace04040ccb13af5f9a283ca80ffe93d1',1,'cutlass::gemm::ProjectOperand< GemmOperand::kD, true >::project()']]] +]; diff --git a/docs/generated-html/search/functions_e.html b/docs/generated-html/search/functions_e.html new file mode 100644 index 00000000..cddb9bb5 --- /dev/null +++ b/docs/generated-html/search/functions_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_e.js b/docs/generated-html/search/functions_e.js new file mode 100644 index 00000000..8bc9b111 --- /dev/null +++ b/docs/generated-html/search/functions_e.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['ref',['ref',['../classcutlass_1_1TensorView.html#a8650860460ea24944c803a671095be09',1,'cutlass::TensorView::ref()'],['../classcutlass_1_1TensorView.html#a5cbff89d3d8dc71d27a4d6c1d7abb58a',1,'cutlass::TensorView::ref() const']]], + ['release',['release',['../classcutlass_1_1platform_1_1unique__ptr.html#a7ac06ebe7bc66573d3225891e12d2279',1,'cutlass::platform::unique_ptr']]], + ['reset',['reset',['../classcutlass_1_1TensorRef.html#abefe392e81da2c09cb127f963ae90674',1,'cutlass::TensorRef::reset()'],['../classcutlass_1_1TensorView.html#a8b1785a1ea5d7aa7eba8e45297d539d3',1,'cutlass::TensorView::reset()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a6740f71511f5495d6038cf8878862331',1,'cutlass::platform::unique_ptr::reset()']]], + ['residue',['residue',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#aae1adef6312e069e59a83d38c03116f9',1,'cutlass::gemm::GlobalLoadStreamBase::residue()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#aab37ea6c47e34466371314ed3971dc7b',1,'cutlass::gemm::GemmGlobalIteratorAb::residue()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a405b93680bb6e356369863244d0b56aa',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::residue()']]], + ['round_5fnearest',['round_nearest',['../namespacecutlass.html#a17c8c408d672d26f1c70d2435f6ac83e',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/functions_f.html b/docs/generated-html/search/functions_f.html new file mode 100644 index 00000000..49672926 --- /dev/null +++ b/docs/generated-html/search/functions_f.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_f.js b/docs/generated-html/search/functions_f.js new file mode 100644 index 00000000..183adc88 --- /dev/null +++ b/docs/generated-html/search/functions_f.js @@ -0,0 +1,16 @@ +var searchData= +[ + ['set',['set',['../classcutlass_1_1PredicateVector_1_1Iterator.html#aadfd039b5622098c9e46706a27122575',1,'cutlass::PredicateVector::Iterator::set()'],['../structcutlass_1_1PredicateVector.html#a062fa8a8df725ef08ced2ffcca8336af',1,'cutlass::PredicateVector::set()'],['../structcutlass_1_1PredicateTileAdapter.html#aeda47efdda0387f9c3c7b31f836afca5',1,'cutlass::PredicateTileAdapter::set()']]], + ['shared_5fiterator_5fload',['shared_iterator_load',['../namespacecutlass.html#abcec976c59cab75ca55b338d125154a3',1,'cutlass::shared_iterator_load(InputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#aa9416026c6db08d92a34c2ac08fea8c3',1,'cutlass::shared_iterator_load(InputIterator &iterator, Fragment &fragment, int d)']]], + ['shared_5fiterator_5fstore',['shared_iterator_store',['../namespacecutlass.html#a705c6d75513e112d2731d1c40f4cf109',1,'cutlass']]], + ['shared_5fload_5ffence',['shared_load_fence',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9b5e42f222fec98ff479bc1650221b84',1,'cutlass::gemm::GemmEpilogue::shared_load_fence()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a475463c1e3af71598e22da8956900ebe',1,'cutlass::gemm::GemmTraits::shared_load_fence()']]], + ['shared_5fstore_5ffence',['shared_store_fence',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac1b2a16b4ccf3e9617faf4d8a2c43691',1,'cutlass::gemm::GemmEpilogue::shared_store_fence()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#ac3c840a3d90c0da43301761af83c2c9f',1,'cutlass::gemm::GemmTraits::shared_store_fence()']]], + ['sharedloadstream',['SharedLoadStream',['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a6e097738679436d580e8dc6ac70efaad',1,'cutlass::gemm::SharedLoadStream::SharedLoadStream()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a93e9bcdca4ceb68754fb1f73e2b25d25',1,'cutlass::gemm::SharedLoadStream::SharedLoadStream(Params const &params, SharedStorage &shared_storage)'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a49315aea1c54d84ff19b0ac215128b95',1,'cutlass::gemm::GemmTraits::SharedLoadStream::SharedLoadStream()']]], + ['size',['size',['../classcutlass_1_1TensorView.html#a541a7c22e7109d4059044f146fe69027',1,'cutlass::TensorView::size() const'],['../classcutlass_1_1TensorView.html#a6218d8555679966eab784a6bb1fa4ed1',1,'cutlass::TensorView::size(int dim) const']]], + ['store',['store',['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html#a118c78aa6b0ae0f0c78889689b6878c8',1,'cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html#a45319520b7d341c66bd54d3e8fec48f8',1,'cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store()'],['../structcutlass_1_1Store.html#a1117fa7b7bdeeb3a7f2d647a1d340aaf',1,'cutlass::Store::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a00f6bb93d318bf4cff35c9dabc630167',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a027980b8456243974b0c442866a66e3a',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::store()'],['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#ab70d04589637f285f861902f649f834e',1,'cutlass::Store< double, 2, Memory_, true, 16 >::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#aa130564bb2eba7b07e1f183c98f1d9e2',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::store()'],['../structcutlass_1_1TileStoreIterator.html#a53820de506cecb1f5fb07b3385d8272a',1,'cutlass::TileStoreIterator::store(Fragment &fragment, PredicateIterator pred_it) const'],['../structcutlass_1_1TileStoreIterator.html#a60258b7c1a1708f97e28f8f6c292bfe4',1,'cutlass::TileStoreIterator::store(Fragment &fragment) const']]], + ['store_5fpost_5fincrement',['store_post_increment',['../structcutlass_1_1TileStoreIterator.html#a57aa2c36eb6ad9d2500c1f5396b3a526',1,'cutlass::TileStoreIterator::store_post_increment(Fragment &fragment, PredicateIterator pred_it)'],['../structcutlass_1_1TileStoreIterator.html#ae63949f58c1b32959bbfa5b64d521f0f',1,'cutlass::TileStoreIterator::store_post_increment(Fragment &fragment)']]], + ['stride',['stride',['../classcutlass_1_1TensorRef.html#a89380141d25528c4c7ba6c365b96a878',1,'cutlass::TensorRef::stride() const'],['../classcutlass_1_1TensorRef.html#af47f192552544272774a29d7a0829a31',1,'cutlass::TensorRef::stride(int dim) const'],['../classcutlass_1_1TensorView.html#a3ac125a25199fd91f73d2cfe9fc3d09b',1,'cutlass::TensorView::stride() const'],['../classcutlass_1_1TensorView.html#a522630bb0df977282a9bff17e6fee843',1,'cutlass::TensorView::stride(int dim) const']]], + ['subview',['subview',['../classcutlass_1_1TensorView.html#aee43c516397d7c06eb8012711d8d7c15',1,'cutlass::TensorView']]], + ['swap',['swap',['../classcutlass_1_1platform_1_1unique__ptr.html#a748d413c50bdbbe9e2f9986fbc423036',1,'cutlass::platform::unique_ptr::swap()'],['../namespacecutlass_1_1platform.html#a3e83320a39137d92042eb0bf93be9678',1,'cutlass::platform::swap()']]], + ['swizzle',['swizzle',['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html#a0a366c072ee66bbcb390acd7b8bbe5f8',1,'cutlass::gemm::IdentityBlockSwizzle']]] +]; diff --git a/docs/generated-html/search/groups_0.html b/docs/generated-html/search/groups_0.html new file mode 100644 index 00000000..f4895cb4 --- /dev/null +++ b/docs/generated-html/search/groups_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/groups_0.js b/docs/generated-html/search/groups_0.js new file mode 100644 index 00000000..1b24cdfd --- /dev/null +++ b/docs/generated-html/search/groups_0.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['fragment_20concept',['Fragment Concept',['../group__fragment__concept.html',1,'']]], + ['fragment_20iterator_20concept',['Fragment Iterator Concept',['../group__fragment__iterator__concept.html',1,'']]] +]; diff --git a/docs/generated-html/search/groups_1.html b/docs/generated-html/search/groups_1.html new file mode 100644 index 00000000..31952659 --- /dev/null +++ b/docs/generated-html/search/groups_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/groups_1.js b/docs/generated-html/search/groups_1.js new file mode 100644 index 00000000..5ebe29b0 --- /dev/null +++ b/docs/generated-html/search/groups_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['layout_20concept',['Layout Concept',['../group__layout__concept.html',1,'']]] +]; diff --git a/docs/generated-html/search/groups_2.html b/docs/generated-html/search/groups_2.html new file mode 100644 index 00000000..58824467 --- /dev/null +++ b/docs/generated-html/search/groups_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/groups_2.js b/docs/generated-html/search/groups_2.js new file mode 100644 index 00000000..b7fc0e34 --- /dev/null +++ b/docs/generated-html/search/groups_2.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['predicate_20iterator_20concept',['Predicate Iterator Concept',['../group__predicate__iterator__concept.html',1,'']]], + ['predicate_20tile_20adapter_20concept',['Predicate Tile Adapter Concept',['../group__predicate__tile__adapter.html',1,'']]], + ['predicate_20vector_20concept',['Predicate Vector Concept',['../group__predicate__vector__concept.html',1,'']]] +]; diff --git a/docs/generated-html/search/groups_3.html b/docs/generated-html/search/groups_3.html new file mode 100644 index 00000000..bd23aa6e --- /dev/null +++ b/docs/generated-html/search/groups_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/groups_3.js b/docs/generated-html/search/groups_3.js new file mode 100644 index 00000000..e48e674f --- /dev/null +++ b/docs/generated-html/search/groups_3.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['tile_20load_20iterator_20concept',['Tile Load Iterator Concept',['../group__tile__load__iterator__concept.html',1,'']]], + ['tile_20store_20iterator_20concept',['Tile Store Iterator Concept',['../group__tile__store__iterator__concept.html',1,'']]], + ['tile_20traits_20concept',['Tile Traits Concept',['../group__tile__traits__concept.html',1,'']]] +]; diff --git a/docs/generated-html/search/mag_sel.png b/docs/generated-html/search/mag_sel.png new file mode 100644 index 00000000..81f6040a Binary files /dev/null and b/docs/generated-html/search/mag_sel.png differ diff --git a/docs/generated-html/search/namespaces_0.html b/docs/generated-html/search/namespaces_0.html new file mode 100644 index 00000000..f55ca63a --- /dev/null +++ b/docs/generated-html/search/namespaces_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/namespaces_0.js b/docs/generated-html/search/namespaces_0.js new file mode 100644 index 00000000..53898456 --- /dev/null +++ b/docs/generated-html/search/namespaces_0.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['cutlass',['cutlass',['../namespacecutlass.html',1,'']]], + ['gemm',['gemm',['../namespacecutlass_1_1gemm.html',1,'cutlass']]], + ['platform',['platform',['../namespacecutlass_1_1platform.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/namespaces_1.html b/docs/generated-html/search/namespaces_1.html new file mode 100644 index 00000000..37c816cc --- /dev/null +++ b/docs/generated-html/search/namespaces_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/namespaces_1.js b/docs/generated-html/search/namespaces_1.js new file mode 100644 index 00000000..5f157042 --- /dev/null +++ b/docs/generated-html/search/namespaces_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['nv_5fstd',['nv_std',['../namespacenv__std.html',1,'']]] +]; diff --git a/docs/generated-html/search/nomatches.html b/docs/generated-html/search/nomatches.html new file mode 100644 index 00000000..b1ded27e --- /dev/null +++ b/docs/generated-html/search/nomatches.html @@ -0,0 +1,12 @@ + + + + + + + +
    +
    No Matches
    +
    + + diff --git a/docs/generated-html/search/search.css b/docs/generated-html/search/search.css new file mode 100644 index 00000000..53d15fed --- /dev/null +++ b/docs/generated-html/search/search.css @@ -0,0 +1,271 @@ +/*---------------- Search Box */ + +#FSearchBox { + float: left; +} + +#MSearchBox { + white-space : nowrap; + float: none; + margin-top: 8px; + right: 0px; + width: 170px; + height: 24px; + z-index: 102; +} + +#MSearchBox .left +{ + display:block; + position:absolute; + left:10px; + width:20px; + height:19px; + background:url('search_l.png') no-repeat; + background-position:right; +} + +#MSearchSelect { + display:block; + position:absolute; + width:20px; + height:19px; +} + +.left #MSearchSelect { + left:4px; +} + +.right #MSearchSelect { + right:5px; +} + +#MSearchField { + display:block; + position:absolute; + height:19px; + background:url('search_m.png') repeat-x; + border:none; + width:115px; + margin-left:20px; + padding-left:4px; + color: #909090; + outline: none; + font: 9pt Arial, Verdana, sans-serif; + -webkit-border-radius: 0px; +} + +#FSearchBox #MSearchField { + margin-left:15px; +} + +#MSearchBox .right { + display:block; + position:absolute; + right:10px; + top:8px; + width:20px; + height:19px; + background:url('search_r.png') no-repeat; + background-position:left; +} + +#MSearchClose { + display: none; + position: absolute; + top: 4px; + background : none; + border: none; + margin: 0px 4px 0px 0px; + padding: 0px 0px; + outline: none; +} + +.left #MSearchClose { + left: 6px; +} + +.right #MSearchClose { + right: 2px; +} + +.MSearchBoxActive #MSearchField { + color: #000000; +} + +/*---------------- Search filter selection */ + +#MSearchSelectWindow { + display: none; + position: absolute; + left: 0; top: 0; + border: 1px solid #B7CE90; + background-color: #FBFCF9; + z-index: 10001; + padding-top: 4px; + padding-bottom: 4px; + -moz-border-radius: 4px; + -webkit-border-top-left-radius: 4px; + -webkit-border-top-right-radius: 4px; + -webkit-border-bottom-left-radius: 4px; + -webkit-border-bottom-right-radius: 4px; + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); +} + +.SelectItem { + font: 8pt Arial, Verdana, sans-serif; + padding-left: 2px; + padding-right: 12px; + border: 0px; +} + +span.SelectionMark { + margin-right: 4px; + font-family: monospace; + outline-style: none; + text-decoration: none; +} + +a.SelectItem { + display: block; + outline-style: none; + color: #000000; + text-decoration: none; + padding-left: 6px; + padding-right: 12px; +} + +a.SelectItem:focus, +a.SelectItem:active { + color: #000000; + outline-style: none; + text-decoration: none; +} + +a.SelectItem:hover { + color: #FFFFFF; + background-color: #6F8C3D; + outline-style: none; + text-decoration: none; + cursor: pointer; + display: block; +} + +/*---------------- Search results window */ + +iframe#MSearchResults { + width: 60ex; + height: 15em; +} + +#MSearchResultsWindow { + display: none; + position: absolute; + left: 0; top: 0; + border: 1px solid #000; + background-color: #F4F7EE; + z-index:10000; +} + +/* ----------------------------------- */ + + +#SRIndex { + clear:both; + padding-bottom: 15px; +} + +.SREntry { + font-size: 10pt; + padding-left: 1ex; +} + +.SRPage .SREntry { + font-size: 8pt; + padding: 1px 5px; +} + +body.SRPage { + margin: 5px 2px; +} + +.SRChildren { + padding-left: 3ex; padding-bottom: .5em +} + +.SRPage .SRChildren { + display: none; +} + +.SRSymbol { + font-weight: bold; + color: #789742; + font-family: Arial, Verdana, sans-serif; + text-decoration: none; + outline: none; +} + +a.SRScope { + display: block; + color: #789742; + font-family: Arial, Verdana, sans-serif; + text-decoration: none; + outline: none; +} + +a.SRSymbol:focus, a.SRSymbol:active, +a.SRScope:focus, a.SRScope:active { + text-decoration: underline; +} + +span.SRScope { + padding-left: 4px; +} + +.SRPage .SRStatus { + padding: 2px 5px; + font-size: 8pt; + font-style: italic; +} + +.SRResult { + display: none; +} + +DIV.searchresults { + margin-left: 10px; + margin-right: 10px; +} + +/*---------------- External search page results */ + +.searchresult { + background-color: #F5F8F0; +} + +.pages b { + color: white; + padding: 5px 5px 3px 5px; + background-image: url("../tab_a.png"); + background-repeat: repeat-x; + text-shadow: 0 1px 1px #000000; +} + +.pages { + line-height: 17px; + margin-left: 4px; + text-decoration: none; +} + +.hl { + font-weight: bold; +} + +#searchresults { + margin-bottom: 20px; +} + +.searchpages { + margin-top: 10px; +} + diff --git a/docs/generated-html/search/search.js b/docs/generated-html/search/search.js new file mode 100644 index 00000000..a554ab9c --- /dev/null +++ b/docs/generated-html/search/search.js @@ -0,0 +1,814 @@ +/* + @licstart The following is the entire license notice for the + JavaScript code in this file. + + Copyright (C) 1997-2017 by Dimitri van Heesch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + @licend The above is the entire license notice + for the JavaScript code in this file + */ +function convertToId(search) +{ + var result = ''; + for (i=0;i do a search + { + this.Search(); + } + } + + this.OnSearchSelectKey = function(evt) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==40 && this.searchIndex0) // Up + { + this.searchIndex--; + this.OnSelectItem(this.searchIndex); + } + else if (e.keyCode==13 || e.keyCode==27) + { + this.OnSelectItem(this.searchIndex); + this.CloseSelectionWindow(); + this.DOMSearchField().focus(); + } + return false; + } + + // --------- Actions + + // Closes the results window. + this.CloseResultsWindow = function() + { + this.DOMPopupSearchResultsWindow().style.display = 'none'; + this.DOMSearchClose().style.display = 'none'; + this.Activate(false); + } + + this.CloseSelectionWindow = function() + { + this.DOMSearchSelectWindow().style.display = 'none'; + } + + // Performs a search. + this.Search = function() + { + this.keyTimeout = 0; + + // strip leading whitespace + var searchValue = this.DOMSearchField().value.replace(/^ +/, ""); + + var code = searchValue.toLowerCase().charCodeAt(0); + var idxChar = searchValue.substr(0, 1).toLowerCase(); + if ( 0xD800 <= code && code <= 0xDBFF && searchValue > 1) // surrogate pair + { + idxChar = searchValue.substr(0, 2); + } + + var resultsPage; + var resultsPageWithSearch; + var hasResultsPage; + + var idx = indexSectionsWithContent[this.searchIndex].indexOf(idxChar); + if (idx!=-1) + { + var hexCode=idx.toString(16); + resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html'; + resultsPageWithSearch = resultsPage+'?'+escape(searchValue); + hasResultsPage = true; + } + else // nothing available for this search term + { + resultsPage = this.resultsPath + '/nomatches.html'; + resultsPageWithSearch = resultsPage; + hasResultsPage = false; + } + + window.frames.MSearchResults.location = resultsPageWithSearch; + var domPopupSearchResultsWindow = this.DOMPopupSearchResultsWindow(); + + if (domPopupSearchResultsWindow.style.display!='block') + { + var domSearchBox = this.DOMSearchBox(); + this.DOMSearchClose().style.display = 'inline'; + if (this.insideFrame) + { + var domPopupSearchResults = this.DOMPopupSearchResults(); + domPopupSearchResultsWindow.style.position = 'relative'; + domPopupSearchResultsWindow.style.display = 'block'; + var width = document.body.clientWidth - 8; // the -8 is for IE :-( + domPopupSearchResultsWindow.style.width = width + 'px'; + domPopupSearchResults.style.width = width + 'px'; + } + else + { + var domPopupSearchResults = this.DOMPopupSearchResults(); + var left = getXPos(domSearchBox) + 150; // domSearchBox.offsetWidth; + var top = getYPos(domSearchBox) + 20; // domSearchBox.offsetHeight + 1; + domPopupSearchResultsWindow.style.display = 'block'; + left -= domPopupSearchResults.offsetWidth; + domPopupSearchResultsWindow.style.top = top + 'px'; + domPopupSearchResultsWindow.style.left = left + 'px'; + } + } + + this.lastSearchValue = searchValue; + this.lastResultsPage = resultsPage; + } + + // -------- Activation Functions + + // Activates or deactivates the search panel, resetting things to + // their default values if necessary. + this.Activate = function(isActive) + { + if (isActive || // open it + this.DOMPopupSearchResultsWindow().style.display == 'block' + ) + { + this.DOMSearchBox().className = 'MSearchBoxActive'; + + var searchField = this.DOMSearchField(); + + if (searchField.value == this.searchLabel) // clear "Search" term upon entry + { + searchField.value = ''; + this.searchActive = true; + } + } + else if (!isActive) // directly remove the panel + { + this.DOMSearchBox().className = 'MSearchBoxInactive'; + this.DOMSearchField().value = this.searchLabel; + this.searchActive = false; + this.lastSearchValue = '' + this.lastResultsPage = ''; + } + } +} + +// ----------------------------------------------------------------------- + +// The class that handles everything on the search results page. +function SearchResults(name) +{ + // The number of matches from the last run of . + this.lastMatchCount = 0; + this.lastKey = 0; + this.repeatOn = false; + + // Toggles the visibility of the passed element ID. + this.FindChildElement = function(id) + { + var parentElement = document.getElementById(id); + var element = parentElement.firstChild; + + while (element && element!=parentElement) + { + if (element.nodeName == 'DIV' && element.className == 'SRChildren') + { + return element; + } + + if (element.nodeName == 'DIV' && element.hasChildNodes()) + { + element = element.firstChild; + } + else if (element.nextSibling) + { + element = element.nextSibling; + } + else + { + do + { + element = element.parentNode; + } + while (element && element!=parentElement && !element.nextSibling); + + if (element && element!=parentElement) + { + element = element.nextSibling; + } + } + } + } + + this.Toggle = function(id) + { + var element = this.FindChildElement(id); + if (element) + { + if (element.style.display == 'block') + { + element.style.display = 'none'; + } + else + { + element.style.display = 'block'; + } + } + } + + // Searches for the passed string. If there is no parameter, + // it takes it from the URL query. + // + // Always returns true, since other documents may try to call it + // and that may or may not be possible. + this.Search = function(search) + { + if (!search) // get search word from URL + { + search = window.location.search; + search = search.substring(1); // Remove the leading '?' + search = unescape(search); + } + + search = search.replace(/^ +/, ""); // strip leading spaces + search = search.replace(/ +$/, ""); // strip trailing spaces + search = search.toLowerCase(); + search = convertToId(search); + + var resultRows = document.getElementsByTagName("div"); + var matches = 0; + + var i = 0; + while (i < resultRows.length) + { + var row = resultRows.item(i); + if (row.className == "SRResult") + { + var rowMatchName = row.id.toLowerCase(); + rowMatchName = rowMatchName.replace(/^sr\d*_/, ''); // strip 'sr123_' + + if (search.length<=rowMatchName.length && + rowMatchName.substr(0, search.length)==search) + { + row.style.display = 'block'; + matches++; + } + else + { + row.style.display = 'none'; + } + } + i++; + } + document.getElementById("Searching").style.display='none'; + if (matches == 0) // no results + { + document.getElementById("NoMatches").style.display='block'; + } + else // at least one result + { + document.getElementById("NoMatches").style.display='none'; + } + this.lastMatchCount = matches; + return true; + } + + // return the first item with index index or higher that is visible + this.NavNext = function(index) + { + var focusItem; + while (1) + { + var focusName = 'Item'+index; + focusItem = document.getElementById(focusName); + if (focusItem && focusItem.parentNode.parentNode.style.display=='block') + { + break; + } + else if (!focusItem) // last element + { + break; + } + focusItem=null; + index++; + } + return focusItem; + } + + this.NavPrev = function(index) + { + var focusItem; + while (1) + { + var focusName = 'Item'+index; + focusItem = document.getElementById(focusName); + if (focusItem && focusItem.parentNode.parentNode.style.display=='block') + { + break; + } + else if (!focusItem) // last element + { + break; + } + focusItem=null; + index--; + } + return focusItem; + } + + this.ProcessKeys = function(e) + { + if (e.type == "keydown") + { + this.repeatOn = false; + this.lastKey = e.keyCode; + } + else if (e.type == "keypress") + { + if (!this.repeatOn) + { + if (this.lastKey) this.repeatOn = true; + return false; // ignore first keypress after keydown + } + } + else if (e.type == "keyup") + { + this.lastKey = 0; + this.repeatOn = false; + } + return this.lastKey!=0; + } + + this.Nav = function(evt,itemIndex) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==13) return true; + if (!this.ProcessKeys(e)) return false; + + if (this.lastKey==38) // Up + { + var newIndex = itemIndex-1; + var focusItem = this.NavPrev(newIndex); + if (focusItem) + { + var child = this.FindChildElement(focusItem.parentNode.parentNode.id); + if (child && child.style.display == 'block') // children visible + { + var n=0; + var tmpElem; + while (1) // search for last child + { + tmpElem = document.getElementById('Item'+newIndex+'_c'+n); + if (tmpElem) + { + focusItem = tmpElem; + } + else // found it! + { + break; + } + n++; + } + } + } + if (focusItem) + { + focusItem.focus(); + } + else // return focus to search field + { + parent.document.getElementById("MSearchField").focus(); + } + } + else if (this.lastKey==40) // Down + { + var newIndex = itemIndex+1; + var focusItem; + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem && elem.style.display == 'block') // children visible + { + focusItem = document.getElementById('Item'+itemIndex+'_c0'); + } + if (!focusItem) focusItem = this.NavNext(newIndex); + if (focusItem) focusItem.focus(); + } + else if (this.lastKey==39) // Right + { + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem) elem.style.display = 'block'; + } + else if (this.lastKey==37) // Left + { + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem) elem.style.display = 'none'; + } + else if (this.lastKey==27) // Escape + { + parent.searchBox.CloseResultsWindow(); + parent.document.getElementById("MSearchField").focus(); + } + else if (this.lastKey==13) // Enter + { + return true; + } + return false; + } + + this.NavChild = function(evt,itemIndex,childIndex) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==13) return true; + if (!this.ProcessKeys(e)) return false; + + if (this.lastKey==38) // Up + { + if (childIndex>0) + { + var newIndex = childIndex-1; + document.getElementById('Item'+itemIndex+'_c'+newIndex).focus(); + } + else // already at first child, jump to parent + { + document.getElementById('Item'+itemIndex).focus(); + } + } + else if (this.lastKey==40) // Down + { + var newIndex = childIndex+1; + var elem = document.getElementById('Item'+itemIndex+'_c'+newIndex); + if (!elem) // last child, jump to parent next parent + { + elem = this.NavNext(itemIndex+1); + } + if (elem) + { + elem.focus(); + } + } + else if (this.lastKey==27) // Escape + { + parent.searchBox.CloseResultsWindow(); + parent.document.getElementById("MSearchField").focus(); + } + else if (this.lastKey==13) // Enter + { + return true; + } + return false; + } +} + +function setKeyActions(elem,action) +{ + elem.setAttribute('onkeydown',action); + elem.setAttribute('onkeypress',action); + elem.setAttribute('onkeyup',action); +} + +function setClassAttr(elem,attr) +{ + elem.setAttribute('class',attr); + elem.setAttribute('className',attr); +} + +function createResults() +{ + var results = document.getElementById("SRResults"); + for (var e=0; e + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_0.js b/docs/generated-html/search/typedefs_0.js new file mode 100644 index 00000000..fc2d59d1 --- /dev/null +++ b/docs/generated-html/search/typedefs_0.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['accesstype',['AccessType',['../structcutlass_1_1FragmentIterator.html#a012c5af3a8a40843c576c55ecbc663e7',1,'cutlass::FragmentIterator::AccessType()'],['../structcutlass_1_1FragmentConstIterator.html#addf5c21444f129211eefe7cdca6dfa1b',1,'cutlass::FragmentConstIterator::AccessType()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html#a0b656c41b9fff6402f33e95204ce8860',1,'cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html#a7eccab04c8d3968e74486d0525a3fa02',1,'cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html#abca5165caae7304f33fcad267c16b002',1,'cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html#a87d46956aa317f06f2ba9a535fdfc5da',1,'cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1Load.html#ad0bf2da0c240f3a2a3f4c92162d347ae',1,'cutlass::Load::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a5d7ed0abaeea99ec3399f8eea930f761',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a2b9faed8d92f55a46e313d79d214316d',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::AccessType()'],['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#a8611550c045d6def964d9dafb2be80c6',1,'cutlass::Load< double, 2, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#a942970f88e13c88f496a9da67ed47a6f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Store.html#a8d2f927b2b61987dcea40e84f4575942',1,'cutlass::Store::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a89f329ba11f96ee3ce4428cbc792ac3d',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#ac0af6ae18137156abe24d6479232b955',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::AccessType()'],['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#ad073f5e8252ad24b086f14bd2a109cf9',1,'cutlass::Store< double, 2, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#aeb70e4859e2795b6af63ad5e203b4da9',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1TileIteratorBase.html#abb3dde23971ad35a477b75ee99381b53',1,'cutlass::TileIteratorBase::AccessType()'],['../structcutlass_1_1TileLoadIterator.html#a4af8eeabe7c1ec0362782687a84466e0',1,'cutlass::TileLoadIterator::AccessType()'],['../structcutlass_1_1TileStoreIterator.html#a0e79ed59263ebc3478c43f2f9a50cb5a',1,'cutlass::TileStoreIterator::AccessType()']]], + ['accumulators',['Accumulators',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#afe6bebd94e3379c94054d04c5196edce',1,'cutlass::gemm::GemmEpilogue::Accumulators()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#af7ff579ccb4269bfa5e9ae297260f7a2',1,'cutlass::gemm::GemmEpilogueTraits::Accumulators()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a2fadb0ad2e28109ccfa9195e817a4d54',1,'cutlass::gemm::GemmConfig::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a505306c2af2059f6e84ba32d701d1602',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a4712650b46b6183ea60d79ef18f55b86',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a760a5262f419b789540e7bbb2fda4b9d',1,'cutlass::gemm::ThreadMultiplyAdd::Accumulators()']]], + ['accumulatorsperthread',['AccumulatorsPerThread',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a98d0f84730551eaabfe7404b36478b50',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerThread()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a47807c9c9fb43e7f7b5f409a49986c30',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerThread()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a002b1944b25cc8fe0862f40a8c8555c5',1,'cutlass::gemm::ThreadMultiplyAdd::AccumulatorsPerThread()']]], + ['accumulatorsperwarp',['AccumulatorsPerWarp',['../structcutlass_1_1gemm_1_1GemmConfig.html#a51d583dfcd645ad0ecfc23b87b3c5108',1,'cutlass::gemm::GemmConfig::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#af0c856abdd9f7f26f671493cc629bf0a',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a327ce1b7b6478c27c80baf5d9e26bdbc',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#aa83190df3c1639b6dd632cd4b9278d77',1,'cutlass::gemm::ThreadMultiplyAdd::AccumulatorsPerWarp()']]] +]; diff --git a/docs/generated-html/search/typedefs_1.html b/docs/generated-html/search/typedefs_1.html new file mode 100644 index 00000000..7af807db --- /dev/null +++ b/docs/generated-html/search/typedefs_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_1.js b/docs/generated-html/search/typedefs_1.js new file mode 100644 index 00000000..e1e869f3 --- /dev/null +++ b/docs/generated-html/search/typedefs_1.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['base',['Base',['../structcutlass_1_1gemm_1_1GlobalLoadStream.html#a507f825824e624d80a34ea9395934160',1,'cutlass::gemm::GlobalLoadStream::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a581b7cdeef3e620f246923fa07f9db5a',1,'cutlass::gemm::GemmGlobalTileCdTraits::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ae13e0d30a941e16875f196b4844b03ed',1,'cutlass::gemm::GemmGlobalIteratorAb::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a8f8fbb65070589769468c6b1ac6ba7a5',1,'cutlass::gemm::GemmGlobalIteratorCd::Base()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#ac0c372c24c4c5340153b11edab874741',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Base()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a7ec19bf90207a7f598f2ec5166649495',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#aca63ec1099444c555299dc144282dded',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a4b23ba8c14e26672a516aa43063250c2',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue.html#a07f9a934f04610db41aa1aac2f4cdf04',1,'cutlass::gemm::IgemmEpilogue::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html#a98b415dbe6f7b6cb0c41a4e6b3ad5abf',1,'cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::Base()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#ab19f72d239f639f261fbb63f72f10acf',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Base()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#affd04d88a0bbef13c54f10000a5dc15d',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aef7047c6a0d0c3db0bfb6bec08520aad',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html#a194aa2762885c3d556a84ff410200b86',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::Base()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a48a8eda430139e6a131654a54bbf0f3b',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Base()'],['../classcutlass_1_1TensorView.html#a27f09c55f879410cceb75eb25fe542d4',1,'cutlass::TensorView::Base()'],['../structcutlass_1_1TileLoadIterator.html#a1bc1bd4893c14b313ee71b71db2903f3',1,'cutlass::TileLoadIterator::Base()'],['../structcutlass_1_1TileStoreIterator.html#af4576dca736bab8ac73b308522cb4a67',1,'cutlass::TileStoreIterator::Base()']]], + ['baseparams',['BaseParams',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a09268125f1e323874f6c12b50185c517',1,'cutlass::gemm::GemmGlobalIteratorAb::BaseParams()'],['../structcutlass_1_1TileLoadIterator.html#a788bab4fa46dc26854348b751cf1cc76',1,'cutlass::TileLoadIterator::BaseParams()'],['../structcutlass_1_1TileStoreIterator.html#a5484b46ac2646edb7a185b51137f70c0',1,'cutlass::TileStoreIterator::BaseParams()']]], + ['blockswizzle',['BlockSwizzle',['../structcutlass_1_1gemm_1_1GemmTraits.html#a50672b5fa67d858aeff8f254cf28e941',1,'cutlass::gemm::GemmTraits']]] +]; diff --git a/docs/generated-html/search/typedefs_10.html b/docs/generated-html/search/typedefs_10.html new file mode 100644 index 00000000..d8e5942a --- /dev/null +++ b/docs/generated-html/search/typedefs_10.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_10.js b/docs/generated-html/search/typedefs_10.js new file mode 100644 index 00000000..e488958f --- /dev/null +++ b/docs/generated-html/search/typedefs_10.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['warps',['Warps',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#aaff4a5e0f9e4256f184a22cad0ce8cf4',1,'cutlass::gemm::GemmSharedLoadTileATraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a7ad7a4e33ed43926e165e66162eb620b',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#af4597927405d8bb1ad2c464fad064703',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a4764f70691cb3fee91ce47653363aa4f',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#abb6ba58a2f2d80db0b2c9c1d88454efd',1,'cutlass::gemm::GemmConfig::Warps()']]] +]; diff --git a/docs/generated-html/search/typedefs_11.html b/docs/generated-html/search/typedefs_11.html new file mode 100644 index 00000000..3f37b890 --- /dev/null +++ b/docs/generated-html/search/typedefs_11.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_11.js b/docs/generated-html/search/typedefs_11.js new file mode 100644 index 00000000..d8526488 --- /dev/null +++ b/docs/generated-html/search/typedefs_11.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['yes',['yes',['../structcutlass_1_1platform_1_1is__base__of__helper.html#ac1cf3f804e7686213fd42c678cc6d669',1,'cutlass::platform::is_base_of_helper']]] +]; diff --git a/docs/generated-html/search/typedefs_2.html b/docs/generated-html/search/typedefs_2.html new file mode 100644 index 00000000..745d076c --- /dev/null +++ b/docs/generated-html/search/typedefs_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_2.js b/docs/generated-html/search/typedefs_2.js new file mode 100644 index 00000000..d216b109 --- /dev/null +++ b/docs/generated-html/search/typedefs_2.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['clearaccumulators',['ClearAccumulators',['../structcutlass_1_1gemm_1_1GemmTraits.html#ae1cf7988c9cff79a2c3252aaf91fc165',1,'cutlass::gemm::GemmTraits::ClearAccumulators()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#aba2366bec386c74df47dfd0426b07041',1,'cutlass::gemm::HgemmTraitsHelper::ClearAccumulators()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a5645e18de29a84c9a9b3f3105966f0c5',1,'cutlass::gemm::IgemmTraitsHelper::ClearAccumulators()']]], + ['consttensorref_5ft',['ConstTensorRef_t',['../classcutlass_1_1TensorView.html#a8ef76170bc5ba832dc01339133021830',1,'cutlass::TensorView']]], + ['coord_5ft',['Coord_t',['../classcutlass_1_1TensorView.html#a4037baf5069138ec3967810d2e185017',1,'cutlass::TensorView']]] +]; diff --git a/docs/generated-html/search/typedefs_3.html b/docs/generated-html/search/typedefs_3.html new file mode 100644 index 00000000..def60a5b --- /dev/null +++ b/docs/generated-html/search/typedefs_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_3.js b/docs/generated-html/search/typedefs_3.js new file mode 100644 index 00000000..1c82be67 --- /dev/null +++ b/docs/generated-html/search/typedefs_3.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['deleter_5ftype',['deleter_type',['../classcutlass_1_1platform_1_1unique__ptr.html#a85cab9945c36dc56bd7d6adf30c0d252',1,'cutlass::platform::unique_ptr']]], + ['delta',['Delta',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#af1f105d4712f01880b0944666e2f81ae',1,'cutlass::gemm::GemmEpilogueTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aede069e51e0732a9648c437261bd4d66',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Delta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a07bb48f99000256f04f00564a4371c2f',1,'cutlass::gemm::GemmGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#aba61fb6e93a6423ab72c082c280f5db4',1,'cutlass::gemm::GemmGlobalTileCdTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a645f65f7d8f123936b286521df470224',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#afd691b764b7d105a1ed41dada6049e71',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a2ee87510d2deccf8b9633aaa4f6340ea',1,'cutlass::gemm::GemmSharedLoadTileATraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ad029d098ba13543bf99c728e6b93006d',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a5587ef22f419ab9a7c6117917cc99c57',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#ac5578da2577cddd5a38cb628f894f644',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Delta()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#a8f8de5a6811b77f0c721cd78a237223e',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#aed055504ec5f09657e059416150188a9',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Delta()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a08dada072eefded4c859df4e5fc25ca6',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html#ab55665f7c2f2cb8b8b9b8ac852d48002',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::Delta()'],['../structcutlass_1_1TileTraits.html#af88f5cea9f452d83004ea0fa0f9d56eb',1,'cutlass::TileTraits::Delta()'],['../structcutlass_1_1TileIteratorBase.html#a9bc6c04f4a3adeb5a29743fa43425088',1,'cutlass::TileIteratorBase::Delta()'],['../structcutlass_1_1TileLoadIterator.html#ac2a7f94723259f0d3c7b8a6d5b8778bf',1,'cutlass::TileLoadIterator::Delta()'],['../structcutlass_1_1TileStoreIterator.html#a1c433ba0eea5e6a46f36101d8de98ed0',1,'cutlass::TileStoreIterator::Delta()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a47404b4527b101e286347714aea687d5',1,'cutlass::TileTraitsStrideMajor::Delta()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#ab1a4945bf562debeee1af813288e5896',1,'cutlass::TileTraitsContiguousMajor::Delta()'],['../structcutlass_1_1TileTraitsWarpRake.html#a3ce218b223c5716af40c316899324bbe',1,'cutlass::TileTraitsWarpRake::Delta()']]] +]; diff --git a/docs/generated-html/search/typedefs_4.html b/docs/generated-html/search/typedefs_4.html new file mode 100644 index 00000000..ef733ad2 --- /dev/null +++ b/docs/generated-html/search/typedefs_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_4.js b/docs/generated-html/search/typedefs_4.js new file mode 100644 index 00000000..6415af33 --- /dev/null +++ b/docs/generated-html/search/typedefs_4.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['element',['Element',['../structcutlass_1_1Fragment.html#a9c67fa5bbd0b8b49bd6ec002dee3cbab',1,'cutlass::Fragment::Element()'],['../structcutlass_1_1FragmentIterator.html#ab4ef3c5a6b5e13224e45bbbcb9f1bc5d',1,'cutlass::FragmentIterator::Element()'],['../structcutlass_1_1FragmentConstIterator.html#ae98ab2a88342e7dbf9631cfb5cf5e706',1,'cutlass::FragmentConstIterator::Element()']]], + ['element_5ftype',['element_type',['../classcutlass_1_1platform_1_1unique__ptr.html#a94cea0ebf2ac4bec69dfa1f80ea07d50',1,'cutlass::platform::unique_ptr']]], + ['epilogue',['Epilogue',['../structcutlass_1_1gemm_1_1GemmTraits.html#a424f1ac14e1e7ad37428edd0cf13e7fe',1,'cutlass::gemm::GemmTraits::Epilogue()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a234ae6065d5ab56135e10119d3ad2d98',1,'cutlass::gemm::HgemmTraitsHelper::Epilogue()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a5e2ed697a9091a1ca8b19855b5a2c651',1,'cutlass::gemm::IgemmTraitsHelper::Epilogue()']]] +]; diff --git a/docs/generated-html/search/typedefs_5.html b/docs/generated-html/search/typedefs_5.html new file mode 100644 index 00000000..94db6d21 --- /dev/null +++ b/docs/generated-html/search/typedefs_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_5.js b/docs/generated-html/search/typedefs_5.js new file mode 100644 index 00000000..84d69abc --- /dev/null +++ b/docs/generated-html/search/typedefs_5.js @@ -0,0 +1,14 @@ +var searchData= +[ + ['false_5ftype',['false_type',['../namespacecutlass_1_1platform.html#ad8c95b2109070847b13d355120344380',1,'cutlass::platform']]], + ['fetchedfragment',['FetchedFragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0a7f6ae85cfb162b1facf24dff8bab36',1,'cutlass::gemm::GlobalLoadStreamBase::FetchedFragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a41b45085f17532a6394de3f5ccf201e7',1,'cutlass::gemm::SharedLoadStream::FetchedFragment()']]], + ['fragment',['Fragment',['../structcutlass_1_1FragmentIterator.html#afd15cbe1c9a0fd7871b12f3f3042c808',1,'cutlass::FragmentIterator::Fragment()'],['../structcutlass_1_1FragmentConstIterator.html#acac5b62b365f36f370adb0fee11cea05',1,'cutlass::FragmentConstIterator::Fragment()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a32687e2aa49dfa251eab14d5cd2036be',1,'cutlass::gemm::GlobalLoadStreamBase::Fragment()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a2180cfbb482d300472ad2993e4b555d4',1,'cutlass::gemm::GemmGlobalIteratorAb::Fragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a9f025ed2609bf33230f6a390c22b11b7',1,'cutlass::gemm::SharedLoadStream::Fragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a82dc6d9a10de7aba9a69e6025b2cc2b7',1,'cutlass::gemm::HgemmSwizzle::Fragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a67693ee79f93cb61fc37f2e632eaea8d',1,'cutlass::gemm::IgemmSwizzle::Fragment()'],['../structcutlass_1_1TileIteratorBase.html#a0d7b595d7959cc1680fc07c2e02e1c8e',1,'cutlass::TileIteratorBase::Fragment()'],['../structcutlass_1_1TileLoadIterator.html#aaf72c4897641080b1d84c0bbd8d813cc',1,'cutlass::TileLoadIterator::Fragment()'],['../structcutlass_1_1TileStoreIterator.html#a95da23108b74ad085024ab45e84083e1',1,'cutlass::TileStoreIterator::Fragment()']]], + ['fragmenta',['FragmentA',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a1daf96b6d152c5cf32f248bbfd605b74',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a71aadbb130d4b1a6532c45282b37354f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a69d387d932b628dc51c18fcc178c4914',1,'cutlass::gemm::ThreadMultiplyAdd::FragmentA()']]], + ['fragmentb',['FragmentB',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#ae79e7fc5be2f4c8d30ca83edc151f63a',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a43e278686b493d0aef943f32a9f47b9e',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a5429a730a1dea00dc4aecbe8e3ef1620',1,'cutlass::gemm::ThreadMultiplyAdd::FragmentB()']]], + ['fragmentconstiterator',['FragmentConstIterator',['../structcutlass_1_1TileIteratorBase.html#a25a241bbdc0b0121992019a16f1a6d60',1,'cutlass::TileIteratorBase::FragmentConstIterator()'],['../structcutlass_1_1TileLoadIterator.html#a4c7a3a4917245de8269b74bdabe16b76',1,'cutlass::TileLoadIterator::FragmentConstIterator()'],['../structcutlass_1_1TileStoreIterator.html#a48de0db7ee2ee9699b946a9d5a0364c7',1,'cutlass::TileStoreIterator::FragmentConstIterator()']]], + ['fragmentelement',['FragmentElement',['../structcutlass_1_1TileIteratorBase.html#ac7cca14d54bf3f0749db1ffaea7c9ae7',1,'cutlass::TileIteratorBase::FragmentElement()'],['../structcutlass_1_1TileLoadIterator.html#a2edd89863b8035137ccd8dd3ad7be464',1,'cutlass::TileLoadIterator::FragmentElement()'],['../structcutlass_1_1TileStoreIterator.html#a2b13136a970fae187fcb377c9be28fac',1,'cutlass::TileStoreIterator::FragmentElement()']]], + ['fragmentiterator',['FragmentIterator',['../structcutlass_1_1TileIteratorBase.html#a379a52ed1128fc9f93cad35d3e3233e5',1,'cutlass::TileIteratorBase::FragmentIterator()'],['../structcutlass_1_1TileLoadIterator.html#aebbe5a0996dcd362caad618e78dc2591',1,'cutlass::TileLoadIterator::FragmentIterator()'],['../structcutlass_1_1TileStoreIterator.html#a0843b2d82422e7178f324a8d3be9d705',1,'cutlass::TileStoreIterator::FragmentIterator()']]], + ['fragmentmultiplyadd',['FragmentMultiplyAdd',['../structcutlass_1_1gemm_1_1LinearScaling.html#aa697d4eaced1ef08247aeb1fcc0f0ea8',1,'cutlass::gemm::LinearScaling']]], + ['fragmentshape',['FragmentShape',['../structcutlass_1_1FragmentIterator.html#a63ff1767c4923b0a2b6b64487306ed76',1,'cutlass::FragmentIterator::FragmentShape()'],['../structcutlass_1_1FragmentConstIterator.html#a880f12d0cd42cdae7ce6009d2233f577',1,'cutlass::FragmentConstIterator::FragmentShape()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#afe44fedcf24b90c0cf6ac7d1495b89e4',1,'cutlass::gemm::HgemmSwizzle::FragmentShape()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a13a3b052cd8b714471489a9cc4dc7004',1,'cutlass::gemm::IgemmSwizzle::FragmentShape()'],['../structcutlass_1_1TileIteratorBase.html#a14f4b356c9cd320e6e7b451edbf58c24',1,'cutlass::TileIteratorBase::FragmentShape()'],['../structcutlass_1_1TileLoadIterator.html#a7c27a7b0d8593b002eca186c15fdc869',1,'cutlass::TileLoadIterator::FragmentShape()'],['../structcutlass_1_1TileStoreIterator.html#a3b872e85844c9e009fa480a71a829136',1,'cutlass::TileStoreIterator::FragmentShape()']]], + ['functor',['Functor',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a6c30bea1b2a1bd2e981025851d5b12d1',1,'cutlass::gemm::GemmEpilogue::Functor()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a7cdb30f17692e8fdb3dd4cf4c0b8e9ee',1,'cutlass::gemm::GemmEpilogueTraits::Functor()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a981134cf87d85aa28570a62d9e878b10',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Functor()']]] +]; diff --git a/docs/generated-html/search/typedefs_6.html b/docs/generated-html/search/typedefs_6.html new file mode 100644 index 00000000..bda8ea1c --- /dev/null +++ b/docs/generated-html/search/typedefs_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_6.js b/docs/generated-html/search/typedefs_6.js new file mode 100644 index 00000000..84c6585c --- /dev/null +++ b/docs/generated-html/search/typedefs_6.js @@ -0,0 +1,23 @@ +var searchData= +[ + ['gemmconfig',['GemmConfig',['../structcutlass_1_1gemm_1_1GemmTraits.html#a4efe5d156abca056ef8b5334fb574dd5',1,'cutlass::gemm::GemmTraits::GemmConfig()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a1597c776238f35bcb1acc0a8f8f9c118',1,'cutlass::gemm::HgemmTraitsHelper::GemmConfig()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#af10aebe7ca4e24cce435ac4cd60e7bac',1,'cutlass::gemm::IgemmTraitsHelper::GemmConfig()']]], + ['gemmepiloguetraits',['GemmEpilogueTraits',['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a4a0f361b5c47d0ab5f3308cd3b3b6ef6',1,'cutlass::gemm::HgemmTraitsHelper']]], + ['gemmtiletraitshelpera',['GemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a5557c86a530f5d20a35d3fa620adf417',1,'cutlass::gemm::HgemmTraitsHelper::GemmTileTraitsHelperA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ab9e10d54c81a359db0eba58a11b9a0cf',1,'cutlass::gemm::IgemmTraitsHelper::GemmTileTraitsHelperA()']]], + ['gemmtiletraitshelperb',['GemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a8768c2b03bea0c3601c47dde2bc7ca89',1,'cutlass::gemm::HgemmTraitsHelper::GemmTileTraitsHelperB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a095505bfcea6791accd06bf4d37b9df8',1,'cutlass::gemm::IgemmTraitsHelper::GemmTileTraitsHelperB()']]], + ['globalfragmentc',['GlobalFragmentC',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad8e5337f3d19437e9c4cafcfcc3e3d3e',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['globalfragmentd',['GlobalFragmentD',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a723cd69ee4d5c26579b36e02c531ea88',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['globaliterator',['GlobalIterator',['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a56d3f2606f9464ec57aa61aae378c642',1,'cutlass::gemm::HgemmSwizzle::GlobalIterator()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a880878914c25db44a1781725c24af514',1,'cutlass::gemm::IgemmSwizzle::GlobalIterator()']]], + ['globalloaditeratora',['GlobalLoadIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a5687850f235d644a4820851880740d27',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ab8a3def34300afb5745453d0b33204aa',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ac7ee33e683e48511a1a220df6c9d4758',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadIteratorA()']]], + ['globalloaditeratorb',['GlobalLoadIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a362794738bc14b283a91558bcadbbfd5',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a95559f28cab076da723e4cb24351116e',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a3a6d816852cca926afa08103f754477b',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadIteratorB()']]], + ['globalloaditeratorc',['GlobalLoadIteratorC',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#aecb5429363c7156ee3ad596fe250120a',1,'cutlass::gemm::GemmEpilogue::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a8409d84ee282a4d6953bd41149d8b9c2',1,'cutlass::gemm::GemmEpilogueTraits::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aeea13630bb281834b717f8d9d13a9319',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a24826f99d097eea0298e6be12a6327b9',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalLoadIteratorC()']]], + ['globalloadstreama',['GlobalLoadStreamA',['../structcutlass_1_1gemm_1_1GemmTraits.html#a9cd6c3fddfb4315eb52b672900462c47',1,'cutlass::gemm::GemmTraits::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a448c242880183e006b70d839d210a2ec',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a2aaece6093100c71c4d587994200e3bb',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a7fb1354154f303642da72e6fd157d846',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadStreamA()']]], + ['globalloadstreamb',['GlobalLoadStreamB',['../structcutlass_1_1gemm_1_1GemmTraits.html#ac393b07e780629fc8254fc22cc6f815b',1,'cutlass::gemm::GemmTraits::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#aad467ed9a680b4d77acecb096799cd89',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#abaf5f16ab0b215b406766ecadab29394',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a88e66ee760aea03687e7b3ccc6ea535b',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadStreamB()']]], + ['globalloadtiletraits',['GlobalLoadTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a94f00f94a88588522ca3f9f0197a5a9b',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalLoadTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#aaa009025dcd6360ead1dc18005688821',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalLoadTileTraits()']]], + ['globalstoreiteratord',['GlobalStoreIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a1c766374d900535c944cf2a2de6925f4',1,'cutlass::gemm::GemmEpilogue::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#aeef5745d149770c9f79e12f6d97ffce1',1,'cutlass::gemm::GemmEpilogueTraits::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a23be7b4b498c17f9235a2b4896f1bffb',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad3e937c15bfac443b0e3b94d702f46b2',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalStoreIteratorD()']]], + ['globalstoretiletraits',['GlobalStoreTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a16d7df2934c3c59d9b8f36f7a2137aee',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a16b06a1611dbd22adaa0c9ee5e1b15bd',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalStoreTileTraits()']]], + ['globaltiletraits',['GlobalTileTraits',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#adc95f4a8617cdf28e5b5d7d2d1aefec2',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a8160a260acce2362e90d43bce733c69d',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a5fee0ed52326c0685e8d8295e40ce064',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#afbc41e7b98097b153fd27a48f073a877',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a36e082b2da22d17eeb73af6bd0632314',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a1e6356bf5c87271ab9794fcc79edc145',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a738774d1eb79de7e29c372ddfd48258d',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a24f38105e3c331c733cb672c3a9be588',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()']]], + ['globaltransformera',['GlobalTransformerA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#af9a98d39d6959a9641f7c3c90df2f98e',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalTransformerA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a3fb86b6d3e353df6b752510d64c5e647',1,'cutlass::gemm::HgemmTraitsHelper::GlobalTransformerA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a23bb732b7237bcabe3667408f288844d',1,'cutlass::gemm::IgemmTraitsHelper::GlobalTransformerA()']]], + ['globaltransformerb',['GlobalTransformerB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a437070ba4a214aee363315d6019e450c',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalTransformerB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a7b4de712868095200a338802c1fbb3de',1,'cutlass::gemm::HgemmTraitsHelper::GlobalTransformerB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a600bcc571ea5e04a98663c134d4664b9',1,'cutlass::gemm::IgemmTraitsHelper::GlobalTransformerB()']]], + ['globaltransformerc',['GlobalTransformerC',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a41edfd24b7dd2759f8b72ae8534182a9',1,'cutlass::gemm::GemmEpilogue::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a051f25a4aa3ea71ff400582228adbdaa',1,'cutlass::gemm::GemmEpilogueTraits::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a0682b61d1a1a951026ff026bff9361bb',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad0116b2e7b2ca1526246e2ff7e73fd2f',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalTransformerC()']]], + ['globaltransformerd',['GlobalTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a32f618ff19d984447fba7355d46a69a7',1,'cutlass::gemm::GemmEpilogue::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a261e526c6a8e832bc483bf4e486cc9d7',1,'cutlass::gemm::GemmEpilogueTraits::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ae96c5a3d58dc7a95543f8749f762ca43',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a880293ef6a48a0f4941c8f984c36f591',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalTransformerD()']]] +]; diff --git a/docs/generated-html/search/typedefs_7.html b/docs/generated-html/search/typedefs_7.html new file mode 100644 index 00000000..565b233f --- /dev/null +++ b/docs/generated-html/search/typedefs_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_7.js b/docs/generated-html/search/typedefs_7.js new file mode 100644 index 00000000..ec923548 --- /dev/null +++ b/docs/generated-html/search/typedefs_7.js @@ -0,0 +1,11 @@ +var searchData= +[ + ['igemmconfig',['IgemmConfig',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a5a52727bb9b5d5f8afa7d0384f564036',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['immediateoffsetstrides',['ImmediateOffsetStrides',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#abc47717230ddde3edc88d2770f6841bf',1,'cutlass::gemm::GemmGlobalTileTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a14e9713b0cd34af433c3cae9b283b54c',1,'cutlass::gemm::GemmGlobalTileCdTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a027bebceeda2287b40915ffd95d494a7',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a39414f484da7f993bc96d61c97273614',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a8e767b5e2fb95b0b02a0ea3e8ea58368',1,'cutlass::gemm::GemmSharedLoadTileATraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a5e4204b52ee081a37e824ca71c291c03',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ac585815d08290d9a5a9cdbd611ffdac4',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a9cfb32f902593e7dc018ee802c3520b8',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af53d49bad7060b87a2761fe8a82a7ddd',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::ImmediateOffsetStrides()'],['../structcutlass_1_1TileIteratorBase.html#a561ceb1093b28b8dce67df0129b7b8b8',1,'cutlass::TileIteratorBase::ImmediateOffsetStrides()']]], + ['index',['Index',['../structcutlass_1_1gemm_1_1Gemm.html#a0aca711d07245f3071adeb1111fedd34',1,'cutlass::gemm::Gemm::Index()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a07c93d583bfddd8f916fba6ef809832e',1,'cutlass::gemm::GemmEpilogue::Index()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#ab430d05bd17efd60c28077c87b5ca331',1,'cutlass::gemm::GemmEpilogueTraits::Index()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a6a6e38022606dd8d41cf7264fb059cc2',1,'cutlass::gemm::GlobalLoadStreamBase::Index()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a7ff9cae930c8a6bb9c8ee6d81cb1953f',1,'cutlass::gemm::GemmGlobalIteratorAb::Index()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a56847e834b31b88544093c3df54d299f',1,'cutlass::gemm::GemmGlobalIteratorCd::Index()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#ae67227cecbe84f5c8497d9a7ff82b367',1,'cutlass::gemm::GemmTraits::Index()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a3f45216454a550a116935aede0bda3de',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Index()'],['../structcutlass_1_1TileIteratorBase.html#a44665808adfd69df0d26cec4b1840cc3',1,'cutlass::TileIteratorBase::Index()'],['../structcutlass_1_1TileLoadIterator.html#aaa83f05e0cb3204053c3ee1da036cd36',1,'cutlass::TileLoadIterator::Index()'],['../structcutlass_1_1TileStoreIterator.html#a5ac2280dfcac08cec17b8c0db1c4593e',1,'cutlass::TileStoreIterator::Index()']]], + ['inputfragment',['InputFragment',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#ac7906301019c3e6d60985c3851f1e95e',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::InputFragment()'],['../structcutlass_1_1Copy.html#aed254bbc1ad94ed9d335ab02f199ceb1',1,'cutlass::Copy::InputFragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ab5fab63d83eb0444c08bda16491d2627',1,'cutlass::gemm::HgemmSwizzle::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#aa9a4b05f9fc28b80a4ae4aabb2ce1e8c',1,'cutlass::gemm::IgemmFloatToInt8Converter::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a702ca51abc077355a2d7343976a0cfdb',1,'cutlass::gemm::IgemmInt8ToFloatConverter::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a24a0bd5a9251ba5204b35eb4c4ac7727',1,'cutlass::gemm::IgemmSwizzle::InputFragment()']]], + ['instructionshape',['InstructionShape',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#ac93ba536992debeae86087e638167a13',1,'cutlass::gemm::FragmentMultiplyAdd::InstructionShape()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#ab16a3d8adda89cc4f9765116ea75a4b7',1,'cutlass::gemm::FragmentMultiplyAdd< half >::InstructionShape()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a3a57d05f50932d718538f0d1ededa95b',1,'cutlass::gemm::GemmConfig::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#aa56cdefa659af5ce4efd493b94bafdfd',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#ad73372a37315b0c17a8db21e40a78574',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ac6381210d447fda9b0e9a028d167f22b',1,'cutlass::gemm::ThreadMultiplyAdd::InstructionShape()']]], + ['iterations',['Iterations',['../structcutlass_1_1FragmentIterator.html#a4324ae522c6463e66a64f05d2e58b5f0',1,'cutlass::FragmentIterator::Iterations()'],['../structcutlass_1_1FragmentConstIterator.html#a527100e34ed700787b1419157710dbb2',1,'cutlass::FragmentConstIterator::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a8e3c978da6ed56239783bf4db0a936ae',1,'cutlass::gemm::GemmEpilogue::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#ab00969bdda930eeb7b82985c476adf7d',1,'cutlass::gemm::GemmEpilogueTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ad7b23352072b1509d3383ee775756d2a',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Iterations()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aaf6410f99d7f995792d0ac34efd3a82f',1,'cutlass::gemm::GemmGlobalTileTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a72eebc18d31900db57fa77508016f64a',1,'cutlass::gemm::GemmGlobalTileCdTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a6125e052e47296c3ef53c8a149ffd31b',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a025445699c5c86237d8c3e48f01081ea',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#ae96e490d38ade6db4d853fb6c8f3378b',1,'cutlass::gemm::GemmSharedLoadTileATraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a27bc06b72a94e34d5da6fbfb950459b5',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a6bacc866485330f80596f634e6d14336',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a81ca35e0c5d9553d1dccc981cbd89d47',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Iterations()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#aa9b46937bea47d071d277aa212dd610b',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Iterations()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a0b9b2b7838cb13a61a16501a2662fa51',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Iterations()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a9fb4b56091d4458ebd82130bc3951e5b',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Iterations()'],['../structcutlass_1_1PredicateTileAdapter.html#a1f2d52eec9f488c2a53c4d62af824450',1,'cutlass::PredicateTileAdapter::Iterations()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#a5e461e0eb376de60605a6ab5fdc38058',1,'cutlass::ConstPredicateTileAdapter::Iterations()'],['../structcutlass_1_1TileTraits.html#af7ae2fdb4c8f1702169cc7d437d2b469',1,'cutlass::TileTraits::Iterations()'],['../structcutlass_1_1TileIteratorBase.html#a352ed0773b37f03bf68e4b6cf9899474',1,'cutlass::TileIteratorBase::Iterations()'],['../structcutlass_1_1TileLoadIterator.html#a9720b1e4a10c2d5aa85f9a9c66a31bbf',1,'cutlass::TileLoadIterator::Iterations()'],['../structcutlass_1_1TileStoreIterator.html#a552a67fb03c28e985d143f6193f88308',1,'cutlass::TileStoreIterator::Iterations()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a03a32694da75bb95422c6b550e3324e2',1,'cutlass::TileTraitsStrideMajor::Iterations()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a425a20b642ae8736c12626b2de9b8b82',1,'cutlass::TileTraitsContiguousMajor::Iterations()'],['../structcutlass_1_1TileTraitsWarpRake.html#a410e44aa83f2179152a48f7aceb05323',1,'cutlass::TileTraitsWarpRake::Iterations()']]], + ['iterationsstrides',['IterationsStrides',['../structcutlass_1_1FragmentConstIterator.html#ab683796885f3bae3765efd96883f311b',1,'cutlass::FragmentConstIterator']]], + ['iterator',['Iterator',['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a6925270c4ad157554ab155cddc7b46e6',1,'cutlass::gemm::SharedLoadStream']]] +]; diff --git a/docs/generated-html/search/typedefs_8.html b/docs/generated-html/search/typedefs_8.html new file mode 100644 index 00000000..3063e032 --- /dev/null +++ b/docs/generated-html/search/typedefs_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_8.js b/docs/generated-html/search/typedefs_8.js new file mode 100644 index 00000000..e54f847b --- /dev/null +++ b/docs/generated-html/search/typedefs_8.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['loaditerator',['LoadIterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#acff2a1ab180eec672714cd587a28f9fe',1,'cutlass::gemm::GlobalLoadStreamBase']]] +]; diff --git a/docs/generated-html/search/typedefs_9.html b/docs/generated-html/search/typedefs_9.html new file mode 100644 index 00000000..9c978f7a --- /dev/null +++ b/docs/generated-html/search/typedefs_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_9.js b/docs/generated-html/search/typedefs_9.js new file mode 100644 index 00000000..76a1247d --- /dev/null +++ b/docs/generated-html/search/typedefs_9.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['multiplicandtraits',['MultiplicandTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a21a3524edaf002b5e5878df3c7eae7e7',1,'cutlass::gemm::GemmGlobalTileTraits']]], + ['multiplyadd',['MultiplyAdd',['../structcutlass_1_1gemm_1_1GemmConfig.html#a8669096ddbb8c810fb8d2313d62e6ee7',1,'cutlass::gemm::GemmConfig::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#af810544e956b04830c5be7ce41d3b45c',1,'cutlass::gemm::GemmTraits::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ae9facf63912d98e597883bf7efb56cc8',1,'cutlass::gemm::HgemmTraitsHelper::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a87e34d56fa955670331749724bee9fd8',1,'cutlass::gemm::IgemmTraitsHelper::MultiplyAdd()']]], + ['multiplyaddscalar',['MultiplyAddScalar',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a19fb8c9b9a77aebec507635de7da6f21',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#afac6f7a62b24396ea6861e6fd10779cc',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a42dd312d4cf5bb53b472389897f9deeb',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aad14588b1515e37ede24915f589d32ab',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar()']]] +]; diff --git a/docs/generated-html/search/typedefs_a.html b/docs/generated-html/search/typedefs_a.html new file mode 100644 index 00000000..426df905 --- /dev/null +++ b/docs/generated-html/search/typedefs_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_a.js b/docs/generated-html/search/typedefs_a.js new file mode 100644 index 00000000..be59a69a --- /dev/null +++ b/docs/generated-html/search/typedefs_a.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['no',['no',['../structcutlass_1_1platform_1_1is__base__of__helper.html#ae096aa6c67f60d8d9c5a4b084118a8af',1,'cutlass::platform::is_base_of_helper']]] +]; diff --git a/docs/generated-html/search/typedefs_b.html b/docs/generated-html/search/typedefs_b.html new file mode 100644 index 00000000..fe314a13 --- /dev/null +++ b/docs/generated-html/search/typedefs_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_b.js b/docs/generated-html/search/typedefs_b.js new file mode 100644 index 00000000..90d2ef43 --- /dev/null +++ b/docs/generated-html/search/typedefs_b.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['offset_5ft',['Offset_t',['../classcutlass_1_1TensorView.html#a215946fb080a5253815feb1f639c8f6f',1,'cutlass::TensorView']]], + ['outputfragment',['OutputFragment',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a8ef69ab595489e142911e8e240fb405a',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::OutputFragment()'],['../structcutlass_1_1Copy.html#a545be6c284d625b0841a10cc9126e14a',1,'cutlass::Copy::OutputFragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a9c04f0b0eb0293325f661b72168d4fa8',1,'cutlass::gemm::HgemmSwizzle::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a3d89bfc0d94cd695cbe4a61859e5e553',1,'cutlass::gemm::IgemmFloatToInt8Converter::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a66ac385a1cd771b95f70ee36cd74e8f7',1,'cutlass::gemm::IgemmInt8ToFloatConverter::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#ac0a4e31e95f8e0c77ae087284bb02ff8',1,'cutlass::gemm::IgemmSwizzle::OutputFragment()']]], + ['outputtile',['OutputTile',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a92a135fac401d43a8d2f14982d90274b',1,'cutlass::gemm::GemmEpilogue::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#aed1bd9df5ff579ba3e36ae5ba781c075',1,'cutlass::gemm::GemmEpilogueTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ac30a062bed1a65e45961c4f301b69101',1,'cutlass::gemm::GemmEpilogueTraitsHelper::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ad52b81080731ee1f0d3c2c7eaba6f60d',1,'cutlass::gemm::GemmSharedStoreTileDTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#acb16feebdcad5bbebe9d4d3383c37899',1,'cutlass::gemm::GemmSharedLoadTileDTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a53450f4d7444d6a4c0d2353496c0a4fd',1,'cutlass::gemm::GemmConfig::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a97d7ee63e5d180410b370f095648f367',1,'cutlass::gemm::GemmTraits::OutputTile()']]] +]; diff --git a/docs/generated-html/search/typedefs_c.html b/docs/generated-html/search/typedefs_c.html new file mode 100644 index 00000000..3a6a4a76 --- /dev/null +++ b/docs/generated-html/search/typedefs_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_c.js b/docs/generated-html/search/typedefs_c.js new file mode 100644 index 00000000..7807c3a1 --- /dev/null +++ b/docs/generated-html/search/typedefs_c.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['params',['Params',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ae5209fa80705442693833c63d535161e',1,'cutlass::gemm::GemmEpilogue']]], + ['pointer',['pointer',['../classcutlass_1_1platform_1_1unique__ptr.html#ab6ce60d03d11b269c1e151dfa7c696f9',1,'cutlass::platform::unique_ptr::pointer()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#adcbf24c1b7f45ab5fe8f3ad94154b4d1',1,'cutlass::gemm::GlobalLoadStreamBase::Pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a3ff6f630b6b317ace1cf6e13fdf3a0cd',1,'cutlass::gemm::GemmGlobalTileTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a3abcfa68ae9904a13195d32d6e6c4bc6',1,'cutlass::gemm::GemmGlobalIteratorCd::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a5be0c995c57faafaad7ae55ae015fc00',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#ab883c2a8b90262152faca9cabe515dc4',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#adc4946dfbe914140c6852d0c05b30864',1,'cutlass::gemm::GemmSharedLoadTileATraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#afafb3d9ae470c8ef56ec4ca5e66e2182',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a20471c2f569c28538dad8a220ab25624',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a1e72b69cf2147e4d194893a64417b920',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Pointer()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a84a73da2a07210fcfad10853b941c85e',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Pointer()'],['../structcutlass_1_1TileLoadIterator.html#a5a179e148ccd770e1703f288624fa9b8',1,'cutlass::TileLoadIterator::Pointer()']]], + ['predicatevector',['PredicateVector',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a3dd74f6e12339a87c0eb8f75fbdc7b9c',1,'cutlass::gemm::GemmGlobalIteratorAb::PredicateVector()'],['../structcutlass_1_1PredicateTileAdapter.html#a72669300eb0bd18ea8124f780862a0e4',1,'cutlass::PredicateTileAdapter::PredicateVector()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#ab9143288811a1262f7007f1b76b32e8f',1,'cutlass::ConstPredicateTileAdapter::PredicateVector()'],['../structcutlass_1_1TileIteratorBase.html#a7ab46a9210b421d32af4d1394892cfd5',1,'cutlass::TileIteratorBase::PredicateVector()'],['../structcutlass_1_1TileLoadIterator.html#a64ae02b44f275ef2f016949aec769328',1,'cutlass::TileLoadIterator::PredicateVector()'],['../structcutlass_1_1TileStoreIterator.html#a5aa507eaeb63951f8e69fb223ec41809',1,'cutlass::TileStoreIterator::PredicateVector()']]] +]; diff --git a/docs/generated-html/search/typedefs_d.html b/docs/generated-html/search/typedefs_d.html new file mode 100644 index 00000000..8c3b81fd --- /dev/null +++ b/docs/generated-html/search/typedefs_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_d.js b/docs/generated-html/search/typedefs_d.js new file mode 100644 index 00000000..b5733656 --- /dev/null +++ b/docs/generated-html/search/typedefs_d.js @@ -0,0 +1,31 @@ +var searchData= +[ + ['scalar',['Scalar',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a0d38914bf97084e04102e7897aee4295',1,'cutlass::gemm::GemmEpilogue::Scalar()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a006e50cf5fb67407d41c60d6d08b8b66',1,'cutlass::gemm::GemmEpilogueTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ae2b82b9b62aefa15005091bb84ac20e8',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Scalar()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afbbf15a7b5e4c38e59bf1debf67f04d6',1,'cutlass::gemm::GlobalLoadStreamBase::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a6894b653fffa59bcb847bc3295643d6b',1,'cutlass::gemm::GemmGlobalTileTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a5817b81c7013db9a3f7394ad4b1db79a',1,'cutlass::gemm::GemmGlobalIteratorAb::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6b5b207eb1147e9669215e192901df9e',1,'cutlass::gemm::GemmGlobalIteratorCd::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a8b04fd003fc2db46d749360e8838438b',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#aaa439a0bb6b9de5e2722ea7b011effea',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a1b6956adc65254202864520b668edd14',1,'cutlass::gemm::GemmSharedLoadTileATraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a2a6065e583155b3e389253d3bfb64d73',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a9a2218b570dada2f1e3ccd8004c47856',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a1b025cb056729706f36469e74a9799dc',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af511f0ff83166b2a77d4cad4150c8e8f',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ac618881d66790e4c280dc5692e5ddf95',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a8ae7db3f2f0c57779729d500386c004c',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a7639ccd7f6419a9f232db173a228e756',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ae4128bba3f1df6ef7824e2db79745b00',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html#ab1068ba72468f9ede1d05ba41ea31317',1,'cutlass::gemm::IgemmEpilogueScalar::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html#a0983fd25494f6a7ed5af37a02e99f650',1,'cutlass::gemm::IgemmEpilogueScalar< int >::Scalar()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#ae6b053ca059932f7c0d3c99243854183',1,'cutlass::gemm::LinearScaling::Scalar()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab9979f3f1f6d31e1466780c5777de25e',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Scalar()'],['../structcutlass_1_1TileIteratorBase.html#a17163e93d7d3616b4950925f72bb4c16',1,'cutlass::TileIteratorBase::Scalar()'],['../structcutlass_1_1TileLoadIterator.html#ae8dff52e619f06fbdbca8cb847c79895',1,'cutlass::TileLoadIterator::Scalar()'],['../structcutlass_1_1TileStoreIterator.html#ad52318b430437575b55099ca992ca3a7',1,'cutlass::TileStoreIterator::Scalar()'],['../unioncutlass_1_1Vector.html#a56875d7cbf921261e68e1f63212db5bd',1,'cutlass::Vector::Scalar()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a03199df1287d263f7267239c014f1d9b',1,'cutlass::Vector< half, kLanes_ >::Scalar()'],['../structcutlass_1_1VectorTraits.html#ab3b49d7fb52050c13e50e3c75bf72599',1,'cutlass::VectorTraits::Scalar()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aaf35570b10829356762dcec925a5b4bc',1,'cutlass::VectorTraits< Vector< T, Lanes > >::Scalar()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a6e99dde8432b13472971dc41573a574e',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::Scalar()']]], + ['scalara',['ScalarA',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a6fa76b3e7ac721d47df47eba4e9ef222',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarA()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a366083b229b28e7f44da38273b2ab263',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarA()'],['../structcutlass_1_1gemm_1_1Gemm.html#a6fcf9daef57558e1bb932c6eba99721b',1,'cutlass::gemm::Gemm::ScalarA()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a9d1e4e364be8fd9de5e1199d93ad76aa',1,'cutlass::gemm::GemmConfig::ScalarA()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a96d64bdc48db4971798b620d6b49b3f6',1,'cutlass::gemm::GemmTraits::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a236a408791a38358cbadf19dd0e8ed9f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#aeef5fa0437b4ce1c2e8ac4bc7e062b65',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a382242001b4c8e18ea5f2de724902217',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarA()']]], + ['scalarb',['ScalarB',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#af4f5c4a79c447e5aaf313878eca022cb',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarB()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#af52ec4b92a3e788169764014aebb85a1',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarB()'],['../structcutlass_1_1gemm_1_1Gemm.html#ae6f11bb666c2c8510e99200a2c0fc2f4',1,'cutlass::gemm::Gemm::ScalarB()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#aa13d6f5e5ad907ef09c88ae49e6e8e9b',1,'cutlass::gemm::GemmConfig::ScalarB()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#aa0e8fd28f5247764dfb7843f7670c698',1,'cutlass::gemm::GemmTraits::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#ac7557562de1108bf1abc10829c83e88f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#aaf9e4b8b16150a6ad826c228af2bf103',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a42d181e7f4d0d0a15e1c911d3498b767',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarB()']]], + ['scalarc',['ScalarC',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a92c1ffbfb479cd9fa2c2632ef8e347d3',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarC()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#af553be8ef0b4dc9bb593d98dfce8628d',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarC()'],['../structcutlass_1_1gemm_1_1Gemm.html#a71f0c91768a1a87e94030c8c2db51e55',1,'cutlass::gemm::Gemm::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#abb0741601652df8fdf927d49c2c0e4d0',1,'cutlass::gemm::GemmEpilogue::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#abf97949c238d72854225c1c6131b5cbc',1,'cutlass::gemm::GemmEpilogueTraits::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#ad8f262d7da093d07cdd5c6a4fd9aceea',1,'cutlass::gemm::GemmConfig::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a8f78d4a68817760099081523aa7fd443',1,'cutlass::gemm::GemmTraits::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#af1a6d91d4734683ea791bf57f3c3bbb0',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#acdd554e996a712ff62eb70d6ecf8e116',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a1af758cb98c33060462a2706856b0a01',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarC()']]], + ['scalard',['ScalarD',['../structcutlass_1_1gemm_1_1Gemm.html#ae2aa3663f9f6f5708e816dcf7cd66694',1,'cutlass::gemm::Gemm::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a4887b56a96694ce6350db77f78bb505f',1,'cutlass::gemm::GemmEpilogue::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a1ee74d6f89b044578e1cd6dd210ce5fe',1,'cutlass::gemm::GemmEpilogueTraits::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a188ef7f4c49ff2830753218343a1b8f8',1,'cutlass::gemm::GemmConfig::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a3129be75ee087603170f8367e10e070e',1,'cutlass::gemm::GemmTraits::ScalarD()']]], + ['scalarepilogue',['ScalarEpilogue',['../structcutlass_1_1gemm_1_1Gemm.html#a9349fc5f20215c1c6508e250b0b4e936',1,'cutlass::gemm::Gemm']]], + ['shape',['Shape',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a89f1d9599b418c8bb81c104ca86cf00e',1,'cutlass::gemm::GemmMultiplicandTraits::Shape()'],['../structcutlass_1_1ShapeScale.html#aae9cfc35c517cd89018e4f914acbac29',1,'cutlass::ShapeScale::Shape()'],['../structcutlass_1_1ShapeAdd.html#ad4712a1339445038949445de1dd74e71',1,'cutlass::ShapeAdd::Shape()'],['../structcutlass_1_1ShapeSub.html#a24b6dd8cb6171b85c4e2f37407f9a5c9',1,'cutlass::ShapeSub::Shape()'],['../structcutlass_1_1ShapeMul.html#a8875fc5e861339f981360ed774e8cc94',1,'cutlass::ShapeMul::Shape()'],['../structcutlass_1_1ShapeDiv.html#a108ded386ef6708afc6fe769a77a234b',1,'cutlass::ShapeDiv::Shape()'],['../structcutlass_1_1ShapeMax.html#ad566aceac2563024982eeabb78c6c961',1,'cutlass::ShapeMax::Shape()'],['../structcutlass_1_1ShapeMin.html#a5c813e4c34ea612431d31b36120f8549',1,'cutlass::ShapeMin::Shape()'],['../structcutlass_1_1ShapeStrides.html#ac6fcda9b8e1782f24c1e6d67cd880a6a',1,'cutlass::ShapeStrides::Shape()']]], + ['sharedloaditeratora',['SharedLoadIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a365aed4c0e2ad1bffea517ee36998557',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a1bbb198a50b5f01a0502df44bb678620',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#aa93043ac87d89ce7fb991c9195c3bf99',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadIteratorA()']]], + ['sharedloaditeratorb',['SharedLoadIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a4de905aadc734df69fd0db83f01be56e',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a8d09409973094ca2a17633776a64a303',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a42322b9b10e894fe157e527b378c59f8',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadIteratorB()']]], + ['sharedloaditeratord',['SharedLoadIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a4a0b439f8a57d8e67174ecbd96183070',1,'cutlass::gemm::GemmEpilogue::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a9822fa405b32cc2f471c9fdd37585cb5',1,'cutlass::gemm::GemmEpilogueTraits::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#adbff60de6f90ef4d5ae0c7096692e2c0',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad33ee44527a7fcfd41b4e677927fd4fa',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedLoadIteratorD()']]], + ['sharedloadstreama',['SharedLoadStreamA',['../structcutlass_1_1gemm_1_1GemmTraits.html#ae01371eb31b88fa83c4926564cecafdc',1,'cutlass::gemm::GemmTraits::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#aa5ebe3a857b55412a86ec65ad1c55dd8',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a21c860cc877df13d22dd30eeb5e2b06b',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a70063eb7e19921efef55a6f32562773f',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadStreamA()']]], + ['sharedloadstreamb',['SharedLoadStreamB',['../structcutlass_1_1gemm_1_1GemmTraits.html#acaeb27063a444e2a3b93f3cb70e3c290',1,'cutlass::gemm::GemmTraits::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a12447ce4d11601a625662f9d177cc3d8',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ac5eeca1e91f0e0d4dd48d432d5213215',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a54e8ad5874306a3764951a9791f02c96',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadStreamB()']]], + ['sharedloadtiletraits',['SharedLoadTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ab8ba28fd1da48fcabbafc0de91281b46',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af534fc5698513af3c6724b68ae03316d',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a1125408805bc697755f2b16594c6c8e1',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a118bb34a6f58c3e5a989773b4b597d8c',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a9335aca8b152ff1167763de8ff8fb882',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a458cbcc16fc296d024f2a1a95fb926c1',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af1bc7f7c26db3399201cd95f35a56790',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a851113bffb5b656c5c649845852b3b8d',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedLoadTileTraits()']]], + ['sharedloadtransformerd',['SharedLoadTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a132cabbc1402c87c7b35dea427001a13',1,'cutlass::gemm::GemmEpilogue']]], + ['sharedstorage',['SharedStorage',['../structcutlass_1_1gemm_1_1Gemm.html#ad10627d508fad0efae1fb91b26d7a6b7',1,'cutlass::gemm::Gemm::SharedStorage()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac36dad8a7b6bc7fc6ef88e44068468dc',1,'cutlass::gemm::GemmEpilogue::SharedStorage()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a22c671494d487511c71f2b0f26fdb404',1,'cutlass::gemm::SharedLoadStream::SharedStorage()'],['../structcutlass_1_1TileLoadIterator.html#ab457bd7953af9ef418510f55f52d1f39',1,'cutlass::TileLoadIterator::SharedStorage()'],['../structcutlass_1_1TileStoreIterator.html#ab7922305d47b67e6cfb439e4e8d9f09b',1,'cutlass::TileStoreIterator::SharedStorage()']]], + ['sharedstorefragmentd',['SharedStoreFragmentD',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a5e64440830b36899f9c0ed8b369665c8',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['sharedstoreiteratora',['SharedStoreIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a3a20852daeb46c625b2391d078b30d73',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedStoreIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a7f022d423d42d4081cefa7eb26b4d5b4',1,'cutlass::gemm::HgemmTraitsHelper::SharedStoreIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ae187303a8da63f36960687a4730f4c46',1,'cutlass::gemm::IgemmTraitsHelper::SharedStoreIteratorA()']]], + ['sharedstoreiteratorb',['SharedStoreIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a43713f534798b1e27c4ba38b72e63c08',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedStoreIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#abe3383e7338c08841fd8f0bfb1090448',1,'cutlass::gemm::HgemmTraitsHelper::SharedStoreIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a4d6658f3a3b53760b10a3da9c807b81f',1,'cutlass::gemm::IgemmTraitsHelper::SharedStoreIteratorB()']]], + ['sharedstoreiteratord',['SharedStoreIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#aab0a964efe223c5c29bc816c393b5a9a',1,'cutlass::gemm::GemmEpilogue::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a74f4beb86447f6b613e9b60234cb27bc',1,'cutlass::gemm::GemmEpilogueTraits::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a02a517fd246fb961727d3bd1b4f954be',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#af7024128202d642d3535e1ae5cf5f43d',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreIteratorD()']]], + ['sharedstorestorage',['SharedStoreStorage',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a69092e298d5723028fc24235d72f87fa',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['sharedstorestoragea',['SharedStoreStorageA',['../structcutlass_1_1gemm_1_1GemmTraits.html#a8d49ad32fc9d8c14f6141690962c3f9c',1,'cutlass::gemm::GemmTraits']]], + ['sharedstorestorageb',['SharedStoreStorageB',['../structcutlass_1_1gemm_1_1GemmTraits.html#a438b80cd8d8df0e74014ae47a162f7ed',1,'cutlass::gemm::GemmTraits']]], + ['sharedstoretiletraits',['SharedStoreTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a3a0fb3a914bfd009ff2e3918bcd231a9',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#aaa198fed841af6bf26bf2e9544d0a877',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ad6511b7c2d84a9f6c3ed3639269ac44f',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a1884cbc21987aec651fa8149d4ed1a06',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#acbeea56f0ce95ddd632db3482c1021e5',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a2aad3b2454d956f20dac1bb0ad75a2f8',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ab1ae3d51f65f7af60147da1c51a7a0c2',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad7659dc0eaa491447ad127ef7098924f',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a7624585480f83a46725c92b5dee20ebc',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aca6118b5bbe6f667f05c53bd52543045',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()']]], + ['sharedstoretransformerd',['SharedStoreTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9063e7fc044a679652d5a3a31aa77e7c',1,'cutlass::gemm::GemmEpilogue::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a0b8ac1972b2f2cff48070f8b862ed25c',1,'cutlass::gemm::GemmEpilogueTraits::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aa5cea8dbebda9a12a503ae1416c4da33',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a00000e0cd14b9e6e242eafb5133af8cf',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreTransformerD()']]], + ['skew',['Skew',['../structcutlass_1_1TileIteratorBase.html#ae89afbcf642b3023770ff22969c51d16',1,'cutlass::TileIteratorBase::Skew()'],['../structcutlass_1_1TileLoadIterator.html#a11ec4297c9a1352c8005ac222892b35c',1,'cutlass::TileLoadIterator::Skew()'],['../structcutlass_1_1TileStoreIterator.html#a57348779bb004ed1ea0fd9cc252e895d',1,'cutlass::TileStoreIterator::Skew()']]], + ['storage',['Storage',['../structcutlass_1_1PredicateVector.html#afe85a07b9f311327c6bf04e3a5f94e5a',1,'cutlass::PredicateVector::Storage()'],['../classcutlass_1_1TensorRef.html#a604921388cb7ee18ddb8127b8ca2f7fd',1,'cutlass::TensorRef::Storage()'],['../structcutlass_1_1TileIteratorBase.html#a6ca47fd6e2f9cbb3498c138417ea414a',1,'cutlass::TileIteratorBase::Storage()']]], + ['storeiterator',['StoreIterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a15eee5bf6367a36a5b5c8024437f4834',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['strides',['Strides',['../structcutlass_1_1FragmentIterator.html#a2858ba9a8a9bbaef1de73415cff9b3c1',1,'cutlass::FragmentIterator']]] +]; diff --git a/docs/generated-html/search/typedefs_e.html b/docs/generated-html/search/typedefs_e.html new file mode 100644 index 00000000..ccde4cc1 --- /dev/null +++ b/docs/generated-html/search/typedefs_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_e.js b/docs/generated-html/search/typedefs_e.js new file mode 100644 index 00000000..529cbd11 --- /dev/null +++ b/docs/generated-html/search/typedefs_e.js @@ -0,0 +1,21 @@ +var searchData= +[ + ['tensorref_5ft',['TensorRef_t',['../classcutlass_1_1TensorView.html#a762fc3d887ab14f4c7bcde85f0af16ab',1,'cutlass::TensorView']]], + ['this_5f',['This_',['../structcutlass_1_1Fragment.html#a32f7ff86b73576a15c5ddaa40c4e0a95',1,'cutlass::Fragment::This_()'],['../structcutlass_1_1FragmentIterator.html#ae320d9672450f5341abcdb24a8b09369',1,'cutlass::FragmentIterator::This_()'],['../structcutlass_1_1FragmentConstIterator.html#add14f695231c2bdd6284bf22b1e66f8f',1,'cutlass::FragmentConstIterator::This_()'],['../structcutlass_1_1gemm_1_1Gemm.html#a26c13e8bbad805760443ef6df475e317',1,'cutlass::gemm::Gemm::This_()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a2892be253a3de5bffc3edcef2890d3a8',1,'cutlass::gemm::GemmGlobalIteratorAb::This_()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6a745d66c4c7de352041f779e54e6b2b',1,'cutlass::gemm::GemmGlobalIteratorCd::This_()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa8b453116c2d96ea2c56e08cb981346c',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::This_()']]], + ['threadblocktile',['ThreadBlockTile',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a5e43f3c9aa8d7dc5f01dfc63b1ea97dc',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['threadoffset',['ThreadOffset',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#afd09d3b8e5ca04eab7edc2e5723816e5',1,'cutlass::gemm::GemmGlobalIteratorAb::ThreadOffset()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6d985f8e93be21e56f72ec1400d73df1',1,'cutlass::gemm::GemmGlobalIteratorCd::ThreadOffset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a667cae4a9fa78a6df073f5ee48ef9664',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::ThreadOffset()'],['../structcutlass_1_1TileTraits.html#af9c0fc178dac7f9dac8d254da34e04dd',1,'cutlass::TileTraits::ThreadOffset()'],['../structcutlass_1_1TileIteratorBase.html#a5abf4755aee07dc58b1d6183fbf4786f',1,'cutlass::TileIteratorBase::ThreadOffset()'],['../structcutlass_1_1TileLoadIterator.html#a8a1527b4b469ae1f97afde2502ece70d',1,'cutlass::TileLoadIterator::ThreadOffset()'],['../structcutlass_1_1TileStoreIterator.html#a6a6f51f459f98c0cddeacf476660cd27',1,'cutlass::TileStoreIterator::ThreadOffset()'],['../structcutlass_1_1TileTraitsStrideMajor.html#ae8d14a3c6871072febfd75ed08aba32c',1,'cutlass::TileTraitsStrideMajor::ThreadOffset()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a823ba83e9ca680da0af7d63be772a351',1,'cutlass::TileTraitsContiguousMajor::ThreadOffset()']]], + ['threads',['Threads',['../structcutlass_1_1gemm_1_1ReshapeThreads.html#afd3614ff45f0fc77ad4967951cb5ab57',1,'cutlass::gemm::ReshapeThreads::Threads()'],['../structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html#a894932ad04fae3aea06eb6d259e01c1c',1,'cutlass::gemm::ReshapeThreads< Tile_, Threads_, true >::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a29bd05960cc541bb67098f5483c84cf6',1,'cutlass::gemm::GemmGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a9aff3e2ff0db5a5169257e964e5895c6',1,'cutlass::gemm::GemmGlobalTileCdTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a33e4dcd4449f324fed5ceaa2cde01b50',1,'cutlass::gemm::GemmGlobalIteratorAb::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#afdd08b4f4c1feaa426f997d15cd28c02',1,'cutlass::gemm::GemmGlobalIteratorCd::Threads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a1acf2a1d8bf73fda142e7d82e05f00a2',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a9bef06b59f27c6e673066a7f0280aa06',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Threads()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#ae7a4f120805421ac0712604723612b7e',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a5fd1a9f132c7aa0f68e129553f519d1e',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aeb866237318ac7983e554a08395c5125',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Threads()']]], + ['threadsdelta',['ThreadsDelta',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a65f9ccd630dde0c9db5358cfc951583d',1,'cutlass::gemm::GemmGlobalTileTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#ae2f8331619e735e620f8a8cf2cdde077',1,'cutlass::gemm::GemmGlobalTileCdTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#a6eee97f03dcea1c441116e143cf58018',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a2bb0f0820e52417ff77e7a2bdb9ed434',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::ThreadsDelta()']]], + ['threadshape',['ThreadShape',['../structcutlass_1_1TileTraitsStrideMajor.html#a03567f41ce616ebb4cdb309c85820599',1,'cutlass::TileTraitsStrideMajor::ThreadShape()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a33116b67e580292d4e354ca17ecd4167',1,'cutlass::TileTraitsContiguousMajor::ThreadShape()'],['../structcutlass_1_1TileTraitsWarpRake.html#ad6619e0b5d876fafd51c78e39f2c029e',1,'cutlass::TileTraitsWarpRake::ThreadShape()']]], + ['threadsperwarp',['ThreadsPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a0761c497c41a45652368fc0d54def98f',1,'cutlass::gemm::GemmSharedLoadTileATraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#aed92656a074e915d97a1b6a990aeba66',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#adf72ea773b8d4d3eb184f59c8cdf9543',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a9022ffc49b32503fd3639341e7e291a3',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#aa784f29ff453c1656fdea8270454fa55',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a5bc98fd196c1f1e4e3f1bfc621df4f50',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ad2fbba0a70da29af27ed4578577abc5e',1,'cutlass::gemm::ThreadMultiplyAdd::ThreadsPerWarp()']]], + ['threadsstrides',['ThreadsStrides',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ae540e7ea7106552682aa4c97b833b3b1',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ThreadsStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a2053e4b9cb3ed2727c89960354ea0b29',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ThreadsStrides()']]], + ['tile',['Tile',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aebbf8834d0d88f0e5b3e1926db5e6758',1,'cutlass::gemm::GemmGlobalTileTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ab96f324083e51ce4c2b73c18803c69a7',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a74196946c28e98ee60346b0eeede1471',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a9a00be672617162c4c7ac94c7d8980cc',1,'cutlass::gemm::GemmSharedLoadTileATraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ac242508ec46db0493a69a589dbfc19e4',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a2bc41b907417b47f3dca9c3dd358f8bc',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a63f980fea1ff3dd83ac276cfd83a4ce5',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Tile()'],['../structcutlass_1_1ReshapeTile.html#a8d57fe6422aa920d9815a66e5a85b5f5',1,'cutlass::ReshapeTile::Tile()'],['../structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html#a966a9432cf42dfdff8ad6b89ebd74f06',1,'cutlass::ReshapeTile< Tile_, kAccessSize_, true >::Tile()'],['../structcutlass_1_1TileTraits.html#ab831be0adb255eece4f2e12fd9713831',1,'cutlass::TileTraits::Tile()'],['../structcutlass_1_1TileIteratorBase.html#a954ef18acc12d8256a7d4e37683f8c2c',1,'cutlass::TileIteratorBase::Tile()'],['../structcutlass_1_1TileLoadIterator.html#a7f1499ada284c21624487d4d3a5dbd10',1,'cutlass::TileLoadIterator::Tile()'],['../structcutlass_1_1TileStoreIterator.html#a8a87c8ef986e110a01a9226012594a61',1,'cutlass::TileStoreIterator::Tile()'],['../structcutlass_1_1TileTraitsStrideMajor.html#afbb78ece048b868475d4a6802e6894ac',1,'cutlass::TileTraitsStrideMajor::Tile()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a1607d53544302c12278793bc9b283763',1,'cutlass::TileTraitsContiguousMajor::Tile()'],['../structcutlass_1_1TileTraitsWarpRake.html#adcd658d9daf286368a9d51c8c1647f89',1,'cutlass::TileTraitsWarpRake::Tile()'],['../structcutlass_1_1TileTraitsStandard.html#aee3fee526bc4d4820c03665a2f5f166b',1,'cutlass::TileTraitsStandard::Tile()']]], + ['tilewithoutskew',['TileWithoutSkew',['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a050cf5964a2d3683491bc4313ead5450',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::TileWithoutSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a5a5a36fc570e1225b20ce0a48c89d213',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithoutSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a1f35981a6d661635dfbcf7c7a76056a2',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithoutSkew()']]], + ['tilewithoutskew_5f',['TileWithoutSkew_',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a93ae99460695718babaef6d1ef597e38',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithoutSkew_()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a3d8be9ddea1cab53d1b4b3d508f9eab8',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithoutSkew_()']]], + ['tilewithskew',['TileWithSkew',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a72e0214f86cf8b3711d006dcd69d7a17',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a69c7ec2a779718556e6d9119588e791c',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithSkew()']]], + ['traits',['Traits',['../structcutlass_1_1gemm_1_1Gemm.html#a29f52e33e1f1cf150f5062d9ad2590ff',1,'cutlass::gemm::Gemm::Traits()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a645ab6e9e63163ee6bf536717a30fb1b',1,'cutlass::gemm::GemmEpilogue::Traits()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af2b5682b8e6dd13590ec258a44636430',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Traits()'],['../structcutlass_1_1TileIteratorBase.html#ae7add0ee02bbec2c130ebaf608ab0696',1,'cutlass::TileIteratorBase::Traits()'],['../structcutlass_1_1TileLoadIterator.html#a7c6182031d9aa41d0e4a64516723e20a',1,'cutlass::TileLoadIterator::Traits()'],['../structcutlass_1_1TileStoreIterator.html#a6f50a8aec2d7045e9057b93df08172a8',1,'cutlass::TileStoreIterator::Traits()']]], + ['transformedfragment',['TransformedFragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afe7503a3304eefd633581d6bc73a0108',1,'cutlass::gemm::GlobalLoadStreamBase::TransformedFragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#aa2227d7fa1edef3f6730c7db41b132b4',1,'cutlass::gemm::SharedLoadStream::TransformedFragment()']]], + ['transformer',['Transformer',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#aa24bd9f94bea04a148b49b2a97b63fbe',1,'cutlass::gemm::GlobalLoadStreamBase::Transformer()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#ad1f70f0dd1027da1353ff7a38f524904',1,'cutlass::gemm::SharedLoadStream::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a882c10bed18f62ece97f5f20f9de3296',1,'cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a42c5bafcb226623b3326dbd01fc72f3b',1,'cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#aaaccb3f02a857e0c80d2891c6c6dcdb7',1,'cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#ae66bb2c1f87e19278ff471c32e71ea85',1,'cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html#a98aefa95117dbfdf2e577890318a6c13',1,'cutlass::gemm::IgemmGlobalStoreTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html#a52ecdfd8b94d8d7f4881048e11a33aba',1,'cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html#ad3190650741cef20c1aca919eddd9d72',1,'cutlass::gemm::IgemmGlobalLoadTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html#a49c249026be24ec8a66f5eda99cb855c',1,'cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html#a9edd08d595327a8cc3b8da50622b3bd2',1,'cutlass::gemm::IgemmSharedStoreTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a0b53e18f109ac0fd116e0d01ed6ec197',1,'cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a8a4e3ce1174789e2b695bda7b863079f',1,'cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a92320b7224a77a8af61e55beef30ad49',1,'cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a9728f71c2e7a6a649bd28d8c11241b0a',1,'cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer()']]], + ['true_5ftype',['true_type',['../namespacecutlass_1_1platform.html#a0eddc4a3921e137f31fd8014be96e807',1,'cutlass::platform']]], + ['type',['Type',['../structcutlass_1_1StorageType.html#a2b9c99ae52eb4962428f776efc1e7f06',1,'cutlass::StorageType::Type()'],['../structcutlass_1_1StorageType_3_014_01_4.html#aa6754c0eb530544a1457afe1ae94a807',1,'cutlass::StorageType< 4 >::Type()'],['../structcutlass_1_1StorageType_3_012_01_4.html#a66c52fe770774ea01c511aea1af1f8d4',1,'cutlass::StorageType< 2 >::Type()'],['../structcutlass_1_1StorageType_3_011_01_4.html#a4a70002785c378c1f180800f2a65bcd4',1,'cutlass::StorageType< 1 >::Type()'],['../structcutlass_1_1Vectorize.html#a070ec95f4297d769ee53a4d8a650c05e',1,'cutlass::Vectorize::Type()'],['../structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html#a79f147933e3f520145aee94ae18da3c5',1,'cutlass::Vectorize< Element_, 1 >::Type()'],['../structcutlass_1_1platform_1_1integral__constant.html#af58810ccead8f16ed88cd6a4afdc6e52',1,'cutlass::platform::integral_constant::type()'],['../structcutlass_1_1platform_1_1enable__if.html#aff9c0f270020cf097addf77e53a5af99',1,'cutlass::platform::enable_if::type()'],['../structcutlass_1_1platform_1_1conditional.html#ab6484d0dd6449b5195c4e868026fed11',1,'cutlass::platform::conditional::type()'],['../structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html#a8d55f500f667de560650554e9c220644',1,'cutlass::platform::conditional< false, T, F >::type()'],['../structcutlass_1_1platform_1_1remove__const.html#ac3662947fa50251daf58240a9c798085',1,'cutlass::platform::remove_const::type()'],['../structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html#af68706cfaa6af14edc26ad5b974b47e3',1,'cutlass::platform::remove_const< const T >::type()'],['../structcutlass_1_1platform_1_1remove__volatile.html#a4f5b043d46206248d1bbbcf650707dd1',1,'cutlass::platform::remove_volatile::type()'],['../structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html#aca9bb93efe43106321e4afe0b67542a3',1,'cutlass::platform::remove_volatile< volatile T >::type()'],['../structcutlass_1_1platform_1_1remove__cv.html#a19e5b12cf4eb15ce13d6306735b6de08',1,'cutlass::platform::remove_cv::type()'],['../structcutlass_1_1platform_1_1aligned__storage.html#a9cf0360f335bcd1e9d9e1b266b6dd6c1',1,'cutlass::platform::aligned_storage::type()']]] +]; diff --git a/docs/generated-html/search/typedefs_f.html b/docs/generated-html/search/typedefs_f.html new file mode 100644 index 00000000..09099791 --- /dev/null +++ b/docs/generated-html/search/typedefs_f.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_f.js b/docs/generated-html/search/typedefs_f.js new file mode 100644 index 00000000..1d8d4851 --- /dev/null +++ b/docs/generated-html/search/typedefs_f.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['value_5ftype',['value_type',['../structcutlass_1_1platform_1_1integral__constant.html#ab2ed0b3506818139f1f96639742e79fd',1,'cutlass::platform::integral_constant']]], + ['vector',['Vector',['../structcutlass_1_1VectorTraits.html#a4ac6196c07e0d3ba8a03cd72a05026a2',1,'cutlass::VectorTraits::Vector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#a12b9084c48d2d829730f907485dfb5e5',1,'cutlass::VectorTraits< Vector< T, Lanes > >::Vector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#aff21f15596731eacf8c587811bb4ccdb',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::Vector()']]] +]; diff --git a/docs/generated-html/search/variables_0.html b/docs/generated-html/search/variables_0.html new file mode 100644 index 00000000..51f7bd6b --- /dev/null +++ b/docs/generated-html/search/variables_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_0.js b/docs/generated-html/search/variables_0.js new file mode 100644 index 00000000..6dbf197f --- /dev/null +++ b/docs/generated-html/search/variables_0.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['aligned_5f',['aligned_',['../unioncutlass_1_1Vector.html#a9e9352594fcd022526d5b69b6c25c99c',1,'cutlass::Vector::aligned_()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a9e41dbe541a7dddf1e461e0390fe8896',1,'cutlass::Vector< half, kLanes_ >::aligned_()']]], + ['alpha',['alpha',['../structcutlass_1_1gemm_1_1GemmDesc.html#a053c2b529be527f510ee317737fbf7e8',1,'cutlass::gemm::GemmDesc::alpha()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a3248d6b3d9bcc59365d582b879292a70',1,'cutlass::gemm::LinearScaling::Params::alpha()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#ab9c51c8b1f06e935a353ac5b1c22cee6',1,'cutlass::gemm::LinearScaling::alpha()']]] +]; diff --git a/docs/generated-html/search/variables_1.html b/docs/generated-html/search/variables_1.html new file mode 100644 index 00000000..f46154d8 --- /dev/null +++ b/docs/generated-html/search/variables_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_1.js b/docs/generated-html/search/variables_1.js new file mode 100644 index 00000000..15bf17b9 --- /dev/null +++ b/docs/generated-html/search/variables_1.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['beta',['beta',['../structcutlass_1_1gemm_1_1GemmDesc.html#ab91b702a9932144b388fad3159130332',1,'cutlass::gemm::GemmDesc::beta()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a0e455ad2e4eba67259867f9123ca817b',1,'cutlass::gemm::LinearScaling::Params::beta()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#a8af4e58c4988838f2dd0a2172c47e12e',1,'cutlass::gemm::LinearScaling::beta()']]], + ['byte',['byte',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html#a86f075f91b80918e968951713430f0b4',1,'cutlass::platform::alignment_of::pad']]] +]; diff --git a/docs/generated-html/search/variables_10.html b/docs/generated-html/search/variables_10.html new file mode 100644 index 00000000..b62b717e --- /dev/null +++ b/docs/generated-html/search/variables_10.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_10.js b/docs/generated-html/search/variables_10.js new file mode 100644 index 00000000..c5770385 --- /dev/null +++ b/docs/generated-html/search/variables_10.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['val',['val',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html#abc729cc51d5c90b1d7b0df3092d47cd4',1,'cutlass::platform::alignment_of::pad']]], + ['value',['value',['../structcutlass_1_1platform_1_1integral__constant.html#a9bbaca83ae76941edb9b75b2741d3ad9',1,'cutlass::platform::integral_constant::value()'],['../structcutlass_1_1platform_1_1is__base__of__helper.html#ac7e3ab73057682cc2eb6ed74c33e5eff',1,'cutlass::platform::is_base_of_helper::value()']]] +]; diff --git a/docs/generated-html/search/variables_2.html b/docs/generated-html/search/variables_2.html new file mode 100644 index 00000000..15275b7a --- /dev/null +++ b/docs/generated-html/search/variables_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_2.js b/docs/generated-html/search/variables_2.js new file mode 100644 index 00000000..1f009500 --- /dev/null +++ b/docs/generated-html/search/variables_2.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['clear',['clear',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a5513254af1f9979b6d0b9f236c3e7325',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage']]], + ['congruous',['Congruous',['../structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html#abe4eb7f9a0ed7d48a81029e88849dcf2',1,'cutlass::gemm::GemmOperandTraitsAb']]] +]; diff --git a/docs/generated-html/search/variables_3.html b/docs/generated-html/search/variables_3.html new file mode 100644 index 00000000..fbc36712 --- /dev/null +++ b/docs/generated-html/search/variables_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_3.js b/docs/generated-html/search/variables_3.js new file mode 100644 index 00000000..91abc96b --- /dev/null +++ b/docs/generated-html/search/variables_3.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['d_5fa',['d_a',['../structcutlass_1_1gemm_1_1GemmDesc.html#aae63781de41962f496da469684919447',1,'cutlass::gemm::GemmDesc']]], + ['d_5fb',['d_b',['../structcutlass_1_1gemm_1_1GemmDesc.html#a05915032eba39bc9b085bec5ff17257b',1,'cutlass::gemm::GemmDesc']]], + ['d_5fc',['d_c',['../structcutlass_1_1gemm_1_1GemmDesc.html#aa2b3126c082d04fd31521cb0e84cf4d5',1,'cutlass::gemm::GemmDesc']]], + ['d_5fd',['d_d',['../structcutlass_1_1gemm_1_1GemmDesc.html#a30326e2d81c8e154d749f35837903216',1,'cutlass::gemm::GemmDesc']]] +]; diff --git a/docs/generated-html/search/variables_4.html b/docs/generated-html/search/variables_4.html new file mode 100644 index 00000000..8067e67f --- /dev/null +++ b/docs/generated-html/search/variables_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_4.js b/docs/generated-html/search/variables_4.js new file mode 100644 index 00000000..a631b3e7 --- /dev/null +++ b/docs/generated-html/search/variables_4.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['epilogue',['epilogue',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a073430a1e8b124aec8a1f1e00f262bc8',1,'cutlass::gemm::GemmTraits::Params::epilogue()'],['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html#afdca9ac1d28e17efaa394f5831a60c04',1,'cutlass::gemm::GemmTraits::SharedStorage::epilogue()']]] +]; diff --git a/docs/generated-html/search/variables_5.html b/docs/generated-html/search/variables_5.html new file mode 100644 index 00000000..7e95e946 --- /dev/null +++ b/docs/generated-html/search/variables_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_5.js b/docs/generated-html/search/variables_5.js new file mode 100644 index 00000000..b10e64d9 --- /dev/null +++ b/docs/generated-html/search/variables_5.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['fetched_5fa',['fetched_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a3147da380e4c1e465aba0b965ac87ab5',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fetched_5fb',['fetched_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a837fbec1d47ae45480941de6290889c0',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fetched_5ffragment',['fetched_fragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a26aa580a2697ad02c27f868e7779348d',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['functor',['functor',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#afa888d993b86ed88950a9e5ab7edeb06',1,'cutlass::gemm::GemmEpilogueTraits::Params']]] +]; diff --git a/docs/generated-html/search/variables_6.html b/docs/generated-html/search/variables_6.html new file mode 100644 index 00000000..3d398e62 --- /dev/null +++ b/docs/generated-html/search/variables_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_6.js b/docs/generated-html/search/variables_6.js new file mode 100644 index 00000000..859d50bf --- /dev/null +++ b/docs/generated-html/search/variables_6.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['global',['global',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html#a3c2980547310ec4307f3a5f9817dfc51',1,'cutlass::gemm::GemmTraits::StreamSharedStorage']]], + ['global_5fstream_5fa',['global_stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a575bcff901d69ae3f46987222f23ab64',1,'cutlass::gemm::GemmTraits::Params']]], + ['global_5fstream_5fb',['global_stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a46affe35cb16874de5a2b9777aedf596',1,'cutlass::gemm::GemmTraits::Params']]] +]; diff --git a/docs/generated-html/search/variables_7.html b/docs/generated-html/search/variables_7.html new file mode 100644 index 00000000..7b791460 --- /dev/null +++ b/docs/generated-html/search/variables_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_7.js b/docs/generated-html/search/variables_7.js new file mode 100644 index 00000000..50430912 --- /dev/null +++ b/docs/generated-html/search/variables_7.js @@ -0,0 +1,12 @@ +var searchData= +[ + ['idx',['idx',['../structcutlass_1_1Coord.html#a50de265129f1db7bdf2f0aefbc6a46bc',1,'cutlass::Coord']]], + ['inc_5fadvance',['inc_advance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a8c2618ac16362a8362dcddeed71c41d4',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a857db0c999250248b104f17f13fe9bd8',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::inc_advance()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a1187258cd4068a627e73bee0302f1fc2',1,'cutlass::TileIteratorBase::Params::inc_advance()']]], + ['inc_5fd',['inc_d',['../structcutlass_1_1TileIteratorBase_1_1Params.html#af95fa1b5102176a0fa9b17713fd48150',1,'cutlass::TileIteratorBase::Params']]], + ['inc_5fh',['inc_h',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#aed94505e5a269d5f33499e71284104f5',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a6306f771718c0c05276e103f30f862b2',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::inc_h()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#aea591d4278a8338ae8b50fa0b8f3a366',1,'cutlass::TileIteratorBase::Params::inc_h()']]], + ['inc_5fw',['inc_w',['../structcutlass_1_1TileIteratorBase_1_1Params.html#ac6e81450a2d78555a6c2415dcc42b178',1,'cutlass::TileIteratorBase::Params']]], + ['isvector',['IsVector',['../structcutlass_1_1VectorTraits.html#abf96ea5dfd3212d388cb91e48cc0e6a2',1,'cutlass::VectorTraits::IsVector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aead181209c756f25ab5870682670bb99',1,'cutlass::VectorTraits< Vector< T, Lanes > >::IsVector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a893488718d8437970c1b4ed4f4056620',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::IsVector()']]], + ['iterator',['iterator',['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html#ae59f871c06a0ac7b9224f0de923082d7',1,'cutlass::gemm::SharedLoadStream::Params::iterator()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a54481a42d4125e3693a086269d9a7b10',1,'cutlass::gemm::SharedLoadStream::iterator()']]], + ['iterator_5fc',['iterator_c',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a7350ceefcd09a9e3662ca30b780cc2ce',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['iterator_5fd',['iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a987c179a7e73c2572fe8aef3255668f7',1,'cutlass::gemm::GemmEpilogueTraits::Params']]] +]; diff --git a/docs/generated-html/search/variables_8.html b/docs/generated-html/search/variables_8.html new file mode 100644 index 00000000..8ebc5f6b --- /dev/null +++ b/docs/generated-html/search/variables_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_8.js b/docs/generated-html/search/variables_8.js new file mode 100644 index 00000000..7406b119 --- /dev/null +++ b/docs/generated-html/search/variables_8.js @@ -0,0 +1,64 @@ +var searchData= +[ + ['k',['k',['../structcutlass_1_1gemm_1_1GemmDesc.html#ac789a7e5d2db65d006f1e8e3df542a6f',1,'cutlass::gemm::GemmDesc::k()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aae3a008b39f9678a03192f6ff54152d8',1,'cutlass::gemm::GemmTraits::Params::k()']]], + ['kaccesssize',['kAccessSize',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aa001e09b246fdd8259cbda6a500cad5f',1,'cutlass::gemm::GemmGlobalTileTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ae852c89da0455025c0c41af258e47047',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a846e6d8d06be0ba6fa41b1431c8ec061',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a0a33d4289ed45e988d560b5f73ac997e',1,'cutlass::gemm::GemmSharedLoadTileATraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#aa41cc5dc82fe08457d103545f8f63081',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a9521c4017e227b2511891a7fb18513e1',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8d308d593b59624abe3e228d588be61d',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kAccessSize()'],['../structcutlass_1_1TileIteratorBase.html#aef07ba456ea016092d7d2446751b76a3',1,'cutlass::TileIteratorBase::kAccessSize()']]], + ['kaccumulatorsperldsa',['kAccumulatorsPerLdsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#abbdd356f280099269867e614684645cf',1,'cutlass::gemm::GemmConfig']]], + ['kaccumulatorsperldsb',['kAccumulatorsPerLdsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a9dd092bca2f1f2c039f367b23bafa9c1',1,'cutlass::gemm::GemmConfig']]], + ['kadvance',['kAdvance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a8c1e871f17685b16a7a41fcc888f0125',1,'cutlass::gemm::GemmGlobalIteratorAb::kAdvance()'],['../structcutlass_1_1TileIteratorBase.html#ac1a64e974dcd69c3a86a31db6cbff421',1,'cutlass::TileIteratorBase::kAdvance()'],['../structcutlass_1_1TileLoadIterator.html#a69d2f21c8188fb3229af8c2dbe0a23b6',1,'cutlass::TileLoadIterator::kAdvance()'],['../structcutlass_1_1TileStoreIterator.html#a8059c57030df99b73309e9210ec5f624',1,'cutlass::TileStoreIterator::kAdvance()']]], + ['kbytes',['kBytes',['../structcutlass_1_1PredicateVector.html#ab870e074b33c598f69fe11e104615c5a',1,'cutlass::PredicateVector']]], + ['kc',['kC',['../structcutlass_1_1Shape.html#a3f2433fd6401dd28f1130499f9fd340c',1,'cutlass::Shape']]], + ['kcount',['kCount',['../structcutlass_1_1ShapeCount.html#a8d25b48b3294b5563f89c62a6e6d00e5',1,'cutlass::ShapeCount']]], + ['kd',['kD',['../structcutlass_1_1Shape.html#a19086a5567d6c710ec853e35a7f29c25',1,'cutlass::Shape']]], + ['kdhw',['kDhw',['../structcutlass_1_1ShapeCount.html#af7d7ccd42de2c49fe57f03cf0e657fe8',1,'cutlass::ShapeCount']]], + ['kdhwc',['kDhwc',['../structcutlass_1_1ShapeCount.html#a5a274564d6b8607a0be621b2664fba18',1,'cutlass::ShapeCount']]], + ['kelements',['kElements',['../structcutlass_1_1Fragment.html#a2b9a64391d00ef23dd8d456c2337fa60',1,'cutlass::Fragment']]], + ['kelementsperaccess',['kElementsPerAccess',['../structcutlass_1_1FragmentIterator.html#ad2c43e30e78e8799df7cb02ac08cee9a',1,'cutlass::FragmentIterator::kElementsPerAccess()'],['../structcutlass_1_1FragmentConstIterator.html#a004fabc9caa6924f3fb4badcbb19e88f',1,'cutlass::FragmentConstIterator::kElementsPerAccess()']]], + ['kextent',['kExtent',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html#a881f84951bc9e47ab2be9ef3f2c1e423',1,'cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >::kExtent()'],['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html#a82ff9b447e4a58164b5f7d53d2602930',1,'cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >::kExtent()']]], + ['kfragmentsize',['kFragmentSize',['../structcutlass_1_1TileIteratorBase.html#a4e0b2bc06bb8f52313e4d8c51ab30ff2',1,'cutlass::TileIteratorBase']]], + ['kh',['kH',['../structcutlass_1_1Shape.html#a3a20d9062bba613c160bb2cd14f80a5e',1,'cutlass::Shape']]], + ['khw',['kHw',['../structcutlass_1_1ShapeCount.html#afc957be69eb78e4849ba8ab3cc66583f',1,'cutlass::ShapeCount']]], + ['khwc',['kHwc',['../structcutlass_1_1ShapeCount.html#a75324e2c9d31a0787343fc994586b742',1,'cutlass::ShapeCount']]], + ['kint8output',['kInt8Output',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html#a8609af98d1e43cd25688bae6f33feed4',1,'cutlass::gemm::IgemmEpilogueTraits']]], + ['kiterationsd',['kIterationsD',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8663311646210b690bb0c2a1012e82f0',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiterationsh',['kIterationsH',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a3b1a461c1dfbcd3817ab2d57bd0da9f1',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiterationsinhperwarp',['kIterationsInHPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a4b8d66df02ba1653aa6d1f23b967f237',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiteratorfragment',['kIteratorFragment',['../structcutlass_1_1TileIteratorBase.html#a38c8ec1e9d0117172981b4c7dd4bf3be',1,'cutlass::TileIteratorBase::kIteratorFragment()'],['../structcutlass_1_1TileLoadIterator.html#aba1d75a0cd5f11dee2aecf89b2b13d98',1,'cutlass::TileLoadIterator::kIteratorFragment()'],['../structcutlass_1_1TileStoreIterator.html#a94c0567316118abfb84fc28560a5a46a',1,'cutlass::TileStoreIterator::kIteratorFragment()']]], + ['kkstrided',['kKstrided',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a1984c9ef6abfd029acbc3f702593ab85',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['klanes',['kLanes',['../structcutlass_1_1VectorTraits.html#a052e1e5963a9e04482b16cb881d1eaf8',1,'cutlass::VectorTraits::kLanes()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aca745b59c6c21292f119943e5a480f39',1,'cutlass::VectorTraits< Vector< T, Lanes > >::kLanes()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a43ac200035052a2c352c8c4b84aac73c',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::kLanes()']]], + ['klayout',['kLayout',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a807cffc6f69f8d30a2fc94cf49fb904c',1,'cutlass::gemm::GlobalLoadStreamBase::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a74bc07cb021a73513ab2fbacd572be90',1,'cutlass::gemm::GemmGlobalTileTraits::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#afe016e0c6234075a8d69ba7341555ece',1,'cutlass::gemm::GemmGlobalIteratorAb::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a27b88818f5b094372bf2c6e090c9148a',1,'cutlass::gemm::GemmGlobalIteratorCd::kLayout()'],['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a19076e58e60d296da74cf504e2a473fd',1,'cutlass::gemm::GemmMultiplicandTraits::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ad2010686bceb21aec9a1924ae379edc1',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aedd49525e2c849baecf88cdfd9e3515c',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#afbd350793888a7e7b299548dca854c13',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a31fa28168811e2d04fbd74029df785ab',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ae0f176733ba9dee0cce45435ac5d53ba',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::kLayout()']]], + ['klayouta',['kLayoutA',['../structcutlass_1_1gemm_1_1GemmTraits.html#ac5bb5931a707ed7672f69267753ba41b',1,'cutlass::gemm::GemmTraits']]], + ['klayoutb',['kLayoutB',['../structcutlass_1_1gemm_1_1GemmTraits.html#a078e8d9cfa1b182e1b96a2cc8c54b684',1,'cutlass::gemm::GemmTraits']]], + ['kmemoryspace',['kMemorySpace',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#af219ece6e66e2866169e06e15cc4472d',1,'cutlass::gemm::GemmGlobalTileTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a59c981aa720f983b846bed7c3e4a7cab',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#ae5a07814b9cfe9a64f69bac0f0772f20',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a4456e4c8048bfb378e5b80833a0d19e5',1,'cutlass::gemm::GemmSharedLoadTileATraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a7007093a4abf79a0b4bfb3fc85a02620',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a8914bc5154f21fa5fd182b0009c44c39',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#afb4687520eff9c6a21c35a5e04f69de8',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kMemorySpace()'],['../structcutlass_1_1TileIteratorBase.html#a871c9b82109eab432c5a1d465643bf97',1,'cutlass::TileIteratorBase::kMemorySpace()'],['../structcutlass_1_1TileLoadIterator.html#ac21bd78b31c99c826f0eddb5aa033bf1',1,'cutlass::TileLoadIterator::kMemorySpace()'],['../structcutlass_1_1TileStoreIterator.html#adaebec9eacf767f63f048033de73ea5b',1,'cutlass::TileStoreIterator::kMemorySpace()']]], + ['koperand',['kOperand',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#ae0bca976b7cfba8561db4cccc16e99e1',1,'cutlass::gemm::GemmGlobalTileTraits::kOperand()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#af511bba9fc2125516eb1442b1c88d851',1,'cutlass::gemm::GemmSharedLoadTileATraits::kOperand()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#afd4881aae69c8041d3931982d85f44e4',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kOperand()']]], + ['kpredicates',['kPredicates',['../structcutlass_1_1PredicateVector.html#afff3a2142d9853606d6ad7c3a459f492',1,'cutlass::PredicateVector']]], + ['kpredicatesperbyte',['kPredicatesPerByte',['../structcutlass_1_1PredicateVector.html#a1387c4a964f971ed4611d750a09ec0b5',1,'cutlass::PredicateVector']]], + ['kpredicatestart',['kPredicateStart',['../structcutlass_1_1PredicateVector.html#acf848dce84c01453ab8a2d00c8d4f86e',1,'cutlass::PredicateVector']]], + ['kscalarsin4b',['kScalarsIn4B',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ad77b9084720ad7378e033e54bfb74ce7',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsIn4B()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a774a052f0f98f50e46dda933c81badd5',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsIn4B()']]], + ['kscalarsperldga',['kScalarsPerLdgA',['../structcutlass_1_1gemm_1_1GemmConfig.html#a2e0a043c5d4d7959ec1a2214c3ac39ac',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldgb',['kScalarsPerLdgB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a849b21fed39aaac1cdd546334739be97',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldgc',['kScalarsPerLdgC',['../structcutlass_1_1gemm_1_1GemmConfig.html#aad47c635a73e83bd4b19494864832d31',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsa',['kScalarsPerLdsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#aa1b75484138923a52b32888fef608d9b',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsb',['kScalarsPerLdsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a86470d3a44e2b50ee31ec3c9f79927ef',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsd',['kScalarsPerLdsD',['../structcutlass_1_1gemm_1_1GemmConfig.html#adaf2ee5b8e6f7bdb9939cd45a186ca56',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperrow',['kScalarsPerRow',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#af1c981ec89a9cabaf5d34231d51a029c',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kScalarsPerRow()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#aa3e378cabce9ed7f199c179c15a12ca4',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kScalarsPerRow()']]], + ['kscalarsperstgd',['kScalarsPerStgD',['../structcutlass_1_1gemm_1_1GemmConfig.html#a3633083f4f778215543e376c092745d7',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperstsa',['kScalarsPerStsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#accc95abc55880abdab92253367b4b186',1,'cutlass::gemm::GemmConfig::kScalarsPerStsA()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ae396f7301f934c179e054f68f0420edf',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsPerStsA()']]], + ['kscalarsperstsb',['kScalarsPerStsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#ac0c8c027e3ede14b62d7c7d519551f21',1,'cutlass::gemm::GemmConfig::kScalarsPerStsB()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a47d99d98c783cf1d317698bd465ffa9a',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsPerStsB()']]], + ['kscalarsperstsd',['kScalarsPerStsD',['../structcutlass_1_1gemm_1_1GemmConfig.html#a3087cdd38e2c65ad0dffdd0587d2cce0',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperthread',['kScalarsPerThread',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ae0b53d76096f9d34df6e16280565c7b1',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kScalarsPerThread()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#abb5fdb164b09c8f74f92278f3d68b95f',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kScalarsPerThread()']]], + ['kskew',['kSkew',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ace14ca9ad11e2cdafcd4a4b63c0df591',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#aba6decf87d770becaadd610d9fc27491',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#aaffe67e519e919bf561142e05da6e6c8',1,'cutlass::gemm::GemmSharedLoadTileATraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ac9cd90ecd02809060a2fe6e2da4210f9',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a48baee6541e6359753f1bae5bd864029',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a7e9ce187e12575f0ecd39b2bfe13dddf',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kSkew()']]], + ['kstages',['kStages',['../structcutlass_1_1gemm_1_1GemmConfig.html#a221949c289057e39d439ce03a5b01c52',1,'cutlass::gemm::GemmConfig']]], + ['kstrideh',['kStrideH',['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a87918f4d67a9c1e19dcd3c6bfc243e97',1,'cutlass::gemm::GemmGlobalTileCdTraits']]], + ['kthreads',['kThreads',['../structcutlass_1_1gemm_1_1Gemm.html#a41239809be4ebc730dd8ff28c9efc58b',1,'cutlass::gemm::Gemm::kThreads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a05039ba8b7d9890903064b1a834dcd3e',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kThreads()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8325bc9d56155ecb6f2ddbd56f4ed23d',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kThreads()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a0b2be601de08848afc4418adb97255bf',1,'cutlass::gemm::GemmConfig::kThreads()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a2b6ad449269a178018f02b8cc64ddb85',1,'cutlass::TileTraitsStrideMajor::kThreads()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a53d10552356855bf7379632e72bbe0c9',1,'cutlass::TileTraitsContiguousMajor::kThreads()'],['../structcutlass_1_1TileTraitsWarpRake.html#a11d943e15e397cbc5233b09071dff642',1,'cutlass::TileTraitsWarpRake::kThreads()'],['../structcutlass_1_1TileTraitsStandard.html#a9cbcbe09aa6e9465b63dd22d59435af1',1,'cutlass::TileTraitsStandard::kThreads()']]], + ['kthreadsperwarp',['kThreadsPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a4246185b8279f245ef5d0650c1eec14f',1,'cutlass::gemm::GemmSharedLoadTileATraits::kThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a049b0bcdf8c5318ee84edeb1e42eaf78',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kThreadsPerWarp()']]], + ['kusage',['kUsage',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a962ffde3b3db78792b67dd1f57ab0a05',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['kvalue',['kValue',['../structcutlass_1_1Extent.html#a2cb62986b9a7c168bf79b083f33c4bad',1,'cutlass::Extent::kValue()'],['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#a10f7184a9a50de0268efa45dab5dc304',1,'cutlass::Extent< Vector< T, Lanes > >::kValue()'],['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a87917a6dfbb1662416c4ea4831669aaf',1,'cutlass::Extent< Vector< T, Lanes > const >::kValue()']]], + ['kw',['kW',['../structcutlass_1_1Shape.html#a78836a20250ff24c25a6622ad818b421',1,'cutlass::Shape']]], + ['kwarpcount',['kWarpCount',['../structcutlass_1_1TileTraitsWarpRake.html#a7a03abe44862077351b0a0a2818d214d',1,'cutlass::TileTraitsWarpRake::kWarpCount()'],['../structcutlass_1_1TileTraitsStandard.html#a1e8f90991e179d13971b84494c989d25',1,'cutlass::TileTraitsStandard::kWarpCount()']]], + ['kwarps',['kWarps',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#af78a275086a297bd93aed920f57a17be',1,'cutlass::gemm::GemmSharedLoadTileATraits::kWarps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a8b8d6a26a29d5477f526d9ce8c27e3e2',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kWarps()']]], + ['kwarpscontiguous',['kWarpsContiguous',['../structcutlass_1_1TileTraitsWarpRake.html#aede0832e95df911b1e6e3f1cc9e593ce',1,'cutlass::TileTraitsWarpRake']]], + ['kwarpsize',['kWarpSize',['../structcutlass_1_1gemm_1_1GemmConfig.html#a677d6a1711cc756b817095b7437cce0e',1,'cutlass::gemm::GemmConfig::kWarpSize()'],['../structcutlass_1_1TileTraitsWarpRake.html#ad25fb7c1b5dc8c5828a69e5a468f490b',1,'cutlass::TileTraitsWarpRake::kWarpSize()'],['../structcutlass_1_1TileTraitsStandard.html#ae9f40eb177c440f01adcc2fe9ca7ec10',1,'cutlass::TileTraitsStandard::kWarpSize()']]], + ['kwarpsstrided',['kWarpsStrided',['../structcutlass_1_1TileTraitsWarpRake.html#a8b1d3fe590f426ce11d597bb98c51bd4',1,'cutlass::TileTraitsWarpRake']]], + ['kwc',['kWc',['../structcutlass_1_1ShapeCount.html#aac5c49469aa80d119c2006291b431276',1,'cutlass::ShapeCount']]], + ['kwordcount',['kWordCount',['../structcutlass_1_1PredicateVector.html#a734bbfaf3829f73ef0b44fa7db4ccd42',1,'cutlass::PredicateVector']]] +]; diff --git a/docs/generated-html/search/variables_9.html b/docs/generated-html/search/variables_9.html new file mode 100644 index 00000000..12136613 --- /dev/null +++ b/docs/generated-html/search/variables_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_9.js b/docs/generated-html/search/variables_9.js new file mode 100644 index 00000000..d7ebf99a --- /dev/null +++ b/docs/generated-html/search/variables_9.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['lda',['lda',['../structcutlass_1_1gemm_1_1GemmDesc.html#a62ad30ba419ccb661e6700da98221789',1,'cutlass::gemm::GemmDesc']]], + ['ldb',['ldb',['../structcutlass_1_1gemm_1_1GemmDesc.html#a7591ce0223b0d05c4d6fca6c67b98bfe',1,'cutlass::gemm::GemmDesc']]], + ['ldc',['ldc',['../structcutlass_1_1gemm_1_1GemmDesc.html#a0f492560cabc45cd492da65b819d09db',1,'cutlass::gemm::GemmDesc']]], + ['ldd',['ldd',['../structcutlass_1_1gemm_1_1GemmDesc.html#a3280e5c5484f5c10d1412bcb70eb77e9',1,'cutlass::gemm::GemmDesc']]], + ['load',['load',['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html#aea5ed35a44624684ffa9ada9d09a8893',1,'cutlass::gemm::GemmEpilogueTraits::StreamSharedStorage']]], + ['load_5fiterator',['load_iterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a42ffcba6af2b5ddfb1f4825a34d43532',1,'cutlass::gemm::GlobalLoadStreamBase::Params::load_iterator()'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html#a3be938f8661f9cd10966866b7b80b471',1,'cutlass::gemm::GlobalLoadStreamBase::SharedStorage::load_iterator()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#ad2381f2311ee8400a2dc57c19084ef5e',1,'cutlass::gemm::GlobalLoadStreamBase::load_iterator()']]] +]; diff --git a/docs/generated-html/search/variables_a.html b/docs/generated-html/search/variables_a.html new file mode 100644 index 00000000..24819a37 --- /dev/null +++ b/docs/generated-html/search/variables_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_a.js b/docs/generated-html/search/variables_a.js new file mode 100644 index 00000000..131fefbd --- /dev/null +++ b/docs/generated-html/search/variables_a.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['m',['m',['../structcutlass_1_1gemm_1_1GemmDesc.html#a5c2b3e75cb6873762ba3f85487b78579',1,'cutlass::gemm::GemmDesc::m()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac344bf5ca318dc343bd6fa6bf52d2e22',1,'cutlass::gemm::GemmEpilogue::m()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aaf27c0f2f4ab730ed5c865e9f7d2373b',1,'cutlass::gemm::GemmTraits::Params::m()']]], + ['main_5floop',['main_loop',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html#aa5dd7edc3cffa785eb1e5b62c18c74c4',1,'cutlass::gemm::GemmTraits::SharedStorage']]] +]; diff --git a/docs/generated-html/search/variables_b.html b/docs/generated-html/search/variables_b.html new file mode 100644 index 00000000..b306931e --- /dev/null +++ b/docs/generated-html/search/variables_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_b.js b/docs/generated-html/search/variables_b.js new file mode 100644 index 00000000..97f3b3b1 --- /dev/null +++ b/docs/generated-html/search/variables_b.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['n',['N',['../structcutlass_1_1Coord.html#a3f2f5a9d7ef2063456c4d9f7e57e71ca',1,'cutlass::Coord::N()'],['../structcutlass_1_1gemm_1_1GemmDesc.html#acee9727aa6cb612a25cd6ced4829061a',1,'cutlass::gemm::GemmDesc::n()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9cc371cd2f1a9485583afdacbb7403ea',1,'cutlass::gemm::GemmEpilogue::n()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a437d4b6f1f149849c5ae635a5993e7ac',1,'cutlass::gemm::GemmTraits::Params::n()']]] +]; diff --git a/docs/generated-html/search/variables_c.html b/docs/generated-html/search/variables_c.html new file mode 100644 index 00000000..75709df8 --- /dev/null +++ b/docs/generated-html/search/variables_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_c.js b/docs/generated-html/search/variables_c.js new file mode 100644 index 00000000..4774d499 --- /dev/null +++ b/docs/generated-html/search/variables_c.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['params',['params',['../structcutlass_1_1gemm_1_1Gemm.html#a3c292637ab0ec8e73856d0cf6efb6da2',1,'cutlass::gemm::Gemm::params()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a81b028a18df51d3caa1b0ba0c990e362',1,'cutlass::gemm::GemmEpilogue::params()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ab8c79cb1a8157dd00429c93cb4a41322',1,'cutlass::gemm::GemmGlobalIteratorAb::params()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ac368b1ea1c5ad2209a6ac6bec597600f',1,'cutlass::gemm::GemmGlobalIteratorCd::params()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a0ad4218ad2c10641379b236473e79e84',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::params()'],['../structcutlass_1_1TileLoadIterator.html#aaafe35622751532971c1b7efc54c888b',1,'cutlass::TileLoadIterator::params()'],['../structcutlass_1_1TileStoreIterator.html#a5e6c00b99e0f752137b07f7059f6ee0f',1,'cutlass::TileStoreIterator::params()']]], + ['pointer',['pointer',['../structcutlass_1_1FragmentIterator.html#af667793926cdb24d701eb75e0345bbd6',1,'cutlass::FragmentIterator::pointer()'],['../structcutlass_1_1FragmentConstIterator.html#aee37f8ea06127b94a304bb776945509b',1,'cutlass::FragmentConstIterator::pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#ad764f98e770d4685006e6888214dcd4d',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::pointer()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#aa42c4e7419308926b925909e6a5c719d',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::pointer()'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#a6608f7027994aaebdefd004fe94153d9',1,'cutlass::TileLoadIterator::Params::pointer()'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#a6bbadae6b13aef8f31a77cacd88b068b',1,'cutlass::TileStoreIterator::Params::pointer()']]], + ['predicate_5finc_5fadvance',['predicate_inc_advance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a2b5d2b02d241e89677c41eb658ace129',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#aa0367d016549cce6bd896bae364fc248',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_inc_advance()']]], + ['predicate_5finc_5fh',['predicate_inc_h',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a36afe18f94aacd0746c8946866371d3c',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a5b8177a936ba30a3d68ca238aaf76ff6',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_inc_h()']]], + ['predicate_5foffset',['predicate_offset',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a3e8f6cf08d23318f3e3263b55cf3b84a',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_offset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a38f13119cf3111e84914f1bef6f5d985',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_offset()']]], + ['predicates',['predicates',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#af323c9db74f0de3376edd35eb377bc9c',1,'cutlass::gemm::GemmGlobalIteratorAb::predicates()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ad23e6224e37ec1d13dc237ce8ec6e977',1,'cutlass::gemm::GemmGlobalIteratorCd::predicates()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af3c9d62554b1d311d82ba89e09cdd3fa',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::predicates()']]] +]; diff --git a/docs/generated-html/search/variables_d.html b/docs/generated-html/search/variables_d.html new file mode 100644 index 00000000..34c80a48 --- /dev/null +++ b/docs/generated-html/search/variables_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_d.js b/docs/generated-html/search/variables_d.js new file mode 100644 index 00000000..c72e1539 --- /dev/null +++ b/docs/generated-html/search/variables_d.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['rank',['Rank',['../classcutlass_1_1TensorRef.html#a22ac53a60e63a743613e732586ad0c66',1,'cutlass::TensorRef::Rank()'],['../classcutlass_1_1TensorView.html#a22c39e8cf314884c5d523914cf4cac90',1,'cutlass::TensorView::Rank()']]], + ['registers',['registers',['../unioncutlass_1_1Vector.html#a29dab07949206cc1609543ffcefd1e5a',1,'cutlass::Vector::registers()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#abd116dc7a5b82ac9b1481fb1d2bfc93f',1,'cutlass::Vector< half, kLanes_ >::registers()']]] +]; diff --git a/docs/generated-html/search/variables_e.html b/docs/generated-html/search/variables_e.html new file mode 100644 index 00000000..4a1c8a61 --- /dev/null +++ b/docs/generated-html/search/variables_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_e.js b/docs/generated-html/search/variables_e.js new file mode 100644 index 00000000..0f47e549 --- /dev/null +++ b/docs/generated-html/search/variables_e.js @@ -0,0 +1,19 @@ +var searchData= +[ + ['scalars',['scalars',['../unioncutlass_1_1Vector.html#a091080b4e9db9e89734f44ceb985d78f',1,'cutlass::Vector::scalars()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#ab4a119a4813f80aa10c25e32f8b115f3',1,'cutlass::Vector< half, kLanes_ >::scalars()']]], + ['shared',['shared',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html#afabd328b106d45b156200f73942d211e',1,'cutlass::gemm::GemmTraits::StreamSharedStorage']]], + ['shared_5fload_5fiterator_5fd',['shared_load_iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a1742e43c128665f0ca39cb578291df81',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['shared_5fstorage',['shared_storage',['../structcutlass_1_1gemm_1_1Gemm.html#a6b0119ed8d92698dab4de68987c8cc1b',1,'cutlass::gemm::Gemm::shared_storage()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a442b5b5688cd658c3b3476650c00281e',1,'cutlass::gemm::GemmEpilogue::shared_storage()']]], + ['shared_5fstore_5fiterator_5fd',['shared_store_iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#af79a0c74a4c30ccec59b393721b5dfc1',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['shared_5fstream',['shared_stream',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html#ae63b5a52106dbd37ea304196335ec210',1,'cutlass::gemm::GemmEpilogueTraits::SharedStorage']]], + ['shared_5fstream_5fa',['shared_stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aa9937ec51d18aad02398d95095117978',1,'cutlass::gemm::GemmTraits::Params']]], + ['shared_5fstream_5fb',['shared_stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a78f22007632937bbd5f3dab7b097477d',1,'cutlass::gemm::GemmTraits::Params']]], + ['stage',['stage',['../structcutlass_1_1TileLoadIterator.html#aa3fd9859de68d76e07ebee06c6ccee92',1,'cutlass::TileLoadIterator::stage()'],['../structcutlass_1_1TileStoreIterator.html#ae435b72b15eca46eb871446d92bd316e',1,'cutlass::TileStoreIterator::stage()']]], + ['store',['store',['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html#a1f31090613c4e6f0895f598880d6c4e5',1,'cutlass::gemm::GemmEpilogueTraits::StreamSharedStorage']]], + ['store_5fiterator',['store_iterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a3e5167fa3f2dc0d8b4b903bd4e936969',1,'cutlass::gemm::GlobalLoadStreamBase::Params::store_iterator()'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html#a939e9ddecc5ee97882a54211a61f5586',1,'cutlass::gemm::GlobalLoadStreamBase::SharedStorage::store_iterator()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0eafd1e245946bd1b9d228ad7d2d0dae',1,'cutlass::gemm::GlobalLoadStreamBase::store_iterator()']]], + ['stream_5fa',['stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a62d3dcf5d97a0a896b2033e55dfb0811',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage::stream_a()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a82a59524b5d3134eb609d280193a5c47',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::stream_a()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a8e68561561ac6b08efbfd116903198c8',1,'cutlass::gemm::GemmTraits::SharedLoadStream::stream_a()']]], + ['stream_5fb',['stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a0173fcc8856b17a52cc5eee845f101fa',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage::stream_b()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#acc287ce5e2f3635d9d55d91914d2d04c',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::stream_b()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a1fdc6af44c14c88a94529d187fda176d',1,'cutlass::gemm::GemmTraits::SharedLoadStream::stream_b()']]], + ['stride_5fd',['stride_d',['../structcutlass_1_1TileIteratorBase_1_1Params.html#ad67234ec264354a22032bb2519575dc1',1,'cutlass::TileIteratorBase::Params']]], + ['stride_5fh',['stride_h',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#ae0fdc7426b22ff2c20f077e251ebc823',1,'cutlass::gemm::GemmEpilogueTraits::Params::stride_h()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a0c6b03c635e14ad4424a83f8c7f8025e',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::stride_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a5cff0436eed0fefa2957ad6d083ed007',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::stride_h()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a58e8c883aea4cfdfa5a84c25a4704ebc',1,'cutlass::TileIteratorBase::Params::stride_h()']]], + ['stride_5fw',['stride_w',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a565f6cab8925d632dcf24bd1974caca2',1,'cutlass::gemm::GemmEpilogueTraits::Params::stride_w()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a313984457c78eea66c980f6813047b9c',1,'cutlass::TileIteratorBase::Params::stride_w()']]] +]; diff --git a/docs/generated-html/search/variables_f.html b/docs/generated-html/search/variables_f.html new file mode 100644 index 00000000..cc86fb59 --- /dev/null +++ b/docs/generated-html/search/variables_f.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_f.js b/docs/generated-html/search/variables_f.js new file mode 100644 index 00000000..834a84b7 --- /dev/null +++ b/docs/generated-html/search/variables_f.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['thread_5foffset',['thread_offset',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a1864c5556529afdc8445021cad780b04',1,'cutlass::gemm::GemmGlobalIteratorAb::thread_offset()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a56601dc34e8f9a070db5dc48c37d55a0',1,'cutlass::gemm::GemmGlobalIteratorCd::thread_offset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab3057dad7a4decb5594c66aa328f8066',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::thread_offset()'],['../structcutlass_1_1TileLoadIterator.html#a7726cdd4fe056c59bb04adb9e5504457',1,'cutlass::TileLoadIterator::thread_offset()'],['../structcutlass_1_1TileStoreIterator.html#a350f5beea87d811f43c55519bc0b9035',1,'cutlass::TileStoreIterator::thread_offset()']]], + ['transformed_5fa',['transformed_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a883b28ca237b1ec076856232cfee0c6f',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['transformed_5fb',['transformed_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a9369a5f819d2a42997491e0df96f47ef',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['transformed_5ffragment',['transformed_fragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afa97cb1cfebca0d6977b1c8318bedddf',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['transformer',['transformer',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a868f82ee87aba37b05721fe8210221c9',1,'cutlass::gemm::GlobalLoadStreamBase::transformer()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#af846390ad0e5b80ccb4e8b95c5fe64a7',1,'cutlass::gemm::SharedLoadStream::transformer()']]] +]; diff --git a/docs/generated-html/sgemm__traits_8h.html b/docs/generated-html/sgemm__traits_8h.html new file mode 100644 index 00000000..4dd76bfc --- /dev/null +++ b/docs/generated-html/sgemm__traits_8h.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: sgemm_traits.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    sgemm_traits.h File Reference
    +
    + + + + + diff --git a/docs/generated-html/sgemm__traits_8h_source.html b/docs/generated-html/sgemm__traits_8h_source.html new file mode 100644 index 00000000..965c5497 --- /dev/null +++ b/docs/generated-html/sgemm__traits_8h_source.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: sgemm_traits.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    sgemm_traits.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/gemm/gemm.h>
    37 
    38 namespace cutlass {
    39 namespace gemm {
    40 
    42 
    43 template <
    45  typename OutputTile_,
    47  typename AccumulatorsPerThread_,
    49  int kScalarsPerLdgA_ = 1,
    51  int kScalarsPerLdgB_ = 1>
    53  : public GemmConfig<
    55  float,
    57  float,
    59  float,
    61  float,
    63  OutputTile_,
    65  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, float, float, float>,
    67  kScalarsPerLdgA_,
    69  kScalarsPerLdgA_,
    71  4,
    73  kScalarsPerLdgB_,
    75  kScalarsPerLdgB_,
    77  4,
    79  1,
    81  4,
    83  1,
    85  2> {};
    86 
    88 
    89 template <
    91  MatrixLayout::Kind kLayoutA_,
    93  MatrixLayout::Kind kLayoutB_,
    95  typename OutputTile_ = Shape<8, 128, 128>,
    97  typename EpilogueFunctor_ = LinearScaling<float>,
    99  typename AccumulatorsPerThread_ = Shape<8, 8, 8>,
    101  int kScalarsPerLdgA_ = 1,
    103  int kScalarsPerLdgB_ = 1,
    105  typename Index_ = int,
    107  typename GemmConfig_ =
    110  typename GemmEpilogueTraits_ =
    113  // The layout for A.
    114  kLayoutA_,
    115  // The layout for B.
    116  kLayoutB_,
    117  // The config.
    118  GemmConfig_,
    119  // The epilogue.
    120  GemmEpilogue<GemmEpilogueTraits_>,
    121  // The index.
    122  Index_> {};
    123 
    125 
    126 } // namespace gemm
    127 } // namespace cutlass
    Definition: convert.h:33
    +
    Defines iterators for efficiently loading and storing to global memory.
    +
    Defines structural properties of complete GEMM computation.
    +
    Definition: sgemm_traits.h:52
    +
    Template implementing matrix multiply-add operations on fragments.
    +
    Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
    +
    Defines iterators for efficiently loading and storing tiles to and from shared memory.
    +
    Definition: gemm_traits.h:79
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    Definition: gemm_epilogue_traits.h:300
    +
    Kind
    Definition: matrix_traits.h:36
    +
    Definition: sgemm_traits.h:112
    +
    Functor to compute linear combination of fragments.
    Definition: linear_scaling.h:40
    +
    Implements a software-pipelined efficient GEMM.
    +
    Defines structural properties of the GEMM epilogue.
    +
    Definition: gemm_traits.h:723
    +
    + + + + diff --git a/docs/generated-html/shape_8h.html b/docs/generated-html/shape_8h.html new file mode 100644 index 00000000..483edeeb --- /dev/null +++ b/docs/generated-html/shape_8h.html @@ -0,0 +1,154 @@ + + + + + + + +Cutlass: shape.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    shape.h File Reference
    +
    +
    + +

    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects. +More...

    +
    #include <cutlass/cutlass.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::Shape< kD_, kH_, kW_, kC_ >
     A Shape implementing Layout Concept describing the dimensions of a cube. More...
     
    struct  cutlass::ShapeCount< Shape >
     Compute derived counted of a Layout Concept based class. More...
     
    struct  cutlass::ShapeScale< A_, kScale_ >
     
    struct  cutlass::ShapeAdd< A_, B_ >
     
    struct  cutlass::ShapeSub< A_, B_ >
     
    struct  cutlass::ShapeMul< A_, B_ >
     
    struct  cutlass::ShapeDiv< A_, B_ >
     
    struct  cutlass::ShapeMax< A_, B_ >
     
    struct  cutlass::ShapeMin< A_, B_ >
     
    struct  cutlass::ShapeStrides< Shape_ >
     
    struct  cutlass::ComputeOffsetFromShape< Shape_ >
     Compute the offset for the given coordinates in a cube. More...
     
    struct  cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >
     Compute the offset for the given coordinates in a cube with a depth of 1. More...
     
    struct  cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >
     Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
     
    struct  cutlass::ComputeOffsetFromStrides< Strides_ >
     Compute the offset for the given coordinates in a cube. More...
     
    struct  cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >
     Compute the offset for the given coordinates in a cube with a depth of 1. More...
     
    struct  cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >
     Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
     
    struct  cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >
     Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_. More...
     
    struct  cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >
     Specialization for D=1. More...
     
    struct  cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >
     Specialization for D=1 and C=1. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/shape_8h_source.html b/docs/generated-html/shape_8h_source.html new file mode 100644 index 00000000..5b980198 --- /dev/null +++ b/docs/generated-html/shape_8h_source.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: shape.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    shape.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/cutlass.h>
    31 
    32 namespace cutlass {
    33 
    35 
    63 template <int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
    64 struct Shape {
    66  static int const kD = kD_;
    68  static int const kH = kH_;
    70  static int const kW = kW_;
    72  static int const kC = kC_;
    73 };
    74 
    78 template <typename Shape>
    79 struct ShapeCount {
    81  static int const kWc = Shape::kW * Shape::kC;
    83  static int const kHw = Shape::kH * Shape::kW;
    85  static int const kHwc = Shape::kH * kWc;
    87  static int const kDhw = Shape::kD * kHw;
    89  static int const kDhwc = Shape::kD * kHwc;
    91  static int const kCount = kDhwc;
    92 };
    93 
    95 
    96 template <typename A_, int kScale_>
    97 struct ShapeScale {
    99 };
    100 
    102 
    103 template <typename A_, typename B_>
    104 struct ShapeAdd {
    106 };
    107 
    109 
    110 template <typename A_, typename B_>
    111 struct ShapeSub {
    112  typedef Shape<A_::kD - B_::kD, A_::kH - B_::kH, A_::kW - B_::kW, A_::kC - B_::kC> Shape;
    113 };
    114 
    116 
    117 template <typename A_, typename B_>
    118 struct ShapeMul {
    120 };
    121 
    123 
    124 template <typename A_, typename B_>
    125 struct ShapeDiv {
    126  typedef Shape<A_::kD / B_::kD, A_::kH / B_::kH, A_::kW / B_::kW, A_::kC / B_::kC> Shape;
    127 };
    128 
    130 
    131 template <typename A_, typename B_>
    132 struct ShapeMax {
    133  typedef Shape<(A_::kD > B_::kD ? A_::kD : B_::kD),
    134  (A_::kH > B_::kH ? A_::kH : B_::kH),
    135  (A_::kW > B_::kW ? A_::kW : B_::kW),
    136  (A_::kC > B_::kC ? A_::kC : B_::kC)>
    138 };
    139 
    141 
    142 template <typename A_, typename B_>
    143 struct ShapeMin {
    144  typedef Shape<(A_::kD < B_::kD ? A_::kD : B_::kD),
    145  (A_::kH < B_::kH ? A_::kH : B_::kH),
    146  (A_::kW < B_::kW ? A_::kW : B_::kW),
    147  (A_::kC < B_::kC ? A_::kC : B_::kC)>
    149 };
    150 
    152 
    153 template <typename Shape_>
    154 struct ShapeStrides {
    156 };
    157 
    159 
    164 template <typename Shape_>
    166  static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
    167  // clang-format off
    168  return d * Shape_::kH * Shape_::kW * Shape_::kC +
    169  h * Shape_::kW * Shape_::kC +
    170  w * Shape_::kC +
    171  c;
    172  // clang-format on
    173  }
    174 };
    175 
    177 
    184 template <int kSh_, int kSw_, int kSc_>
    185 struct ComputeOffsetFromShape<Shape<1, kSh_, kSw_, kSc_> > {
    186  static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
    187  return h * kSw_ * kSc_ + w * kSc_ + c;
    188  }
    189 };
    190 
    192 
    198 template <int kSh_, int kSw_>
    199 struct ComputeOffsetFromShape<Shape<1, kSh_, kSw_, 1> > {
    200  static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * kSw_ + w; }
    201 };
    202 
    204 
    209 template <typename Strides_>
    211  static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
    212  return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC;
    213  }
    214 };
    215 
    217 
    224 template <int S_h_, int S_w_, int S_c_>
    225 struct ComputeOffsetFromStrides<Shape<1, S_h_, S_w_, S_c_> > {
    226  static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
    227  return h * S_h_ + w * S_w_ + c * S_c_;
    228  }
    229 };
    230 
    232 
    238 template <int S_h_, int S_w_>
    239 struct ComputeOffsetFromStrides<Shape<1, S_h_, S_w_, 1> > {
    240  static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * S_h_ + w * S_w_; }
    241 };
    242 
    244 
    251 template <typename Threads_, typename Strides_>
    253  static CUTLASS_DEVICE int get() {
    254  // Decompose the thread index.
    255  int c = threadIdx.x % Threads_::kC;
    256  int w = threadIdx.x / Threads_::kC % Threads_::kW;
    257  int h = threadIdx.x / Threads_::kC / Threads_::kW % Threads_::kH;
    258  int d = threadIdx.x / Threads_::kC / Threads_::kW / Threads_::kH;
    259 
    260  // Compute the offset.
    261  return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC;
    262  }
    263 };
    264 
    266 
    269 template <int T_h_, int T_w_, int T_c_, int S_h_, int S_w_, int S_c_>
    270 struct ComputeThreadOffsetFromStrides<Shape<1, T_h_, T_w_, T_c_>, Shape<1, S_h_, S_w_, S_c_> > {
    271  static CUTLASS_DEVICE int get() {
    272  // Decompose the thread index.
    273  int c = threadIdx.x % T_c_;
    274  int w = threadIdx.x / T_c_ % T_w_;
    275  int h = threadIdx.x / T_c_ / T_w_ % T_h_;
    276 
    277  // Compute the offset.
    278  return h * S_h_ + w * S_w_ + c * S_c_;
    279  }
    280 };
    281 
    283 
    287 template <int T_h_, int T_w_, int S_h_, int S_w_>
    288 struct ComputeThreadOffsetFromStrides<Shape<1, T_h_, T_w_, 1>, Shape<1, S_h_, S_w_, 1> > {
    289  static CUTLASS_DEVICE int get() {
    290  // Decompose the thread index.
    291  int w = threadIdx.x % T_w_;
    292  int h = threadIdx.x / T_w_;
    293 
    294  // Compute the offset.
    295  return h * S_h_ + w * S_w_;
    296  }
    297 };
    298 
    300 
    301 } // namespace cutlass
    Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_.
    Definition: shape.h:252
    +
    static int const kWc
    The number of elements per row.
    Definition: shape.h:81
    +
    Definition: convert.h:33
    +
    Shape< A_::kD+B_::kD, A_::kH+B_::kH, A_::kW+B_::kW, A_::kC+B_::kC > Shape
    Definition: shape.h:105
    +
    Shape< A_::kD *kScale_, A_::kH *kScale_, A_::kW *kScale_, A_::kC *kScale_ > Shape
    Definition: shape.h:98
    +
    Shape< Shape_::kH *Shape_::kW *Shape_::kC, Shape_::kW *Shape_::kC, Shape_::kC, 1 > Shape
    Definition: shape.h:155
    +
    Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
    Definition: shape.h:119
    +
    Shape< A_::kD - B_::kD, A_::kH - B_::kH, A_::kW - B_::kW, A_::kC - B_::kC > Shape
    Definition: shape.h:112
    +
    Definition: shape.h:111
    +
    static int const kH
    The height of the cube.
    Definition: shape.h:68
    +
    static int const kC
    The number of scalars per element.
    Definition: shape.h:72
    +
    Definition: shape.h:97
    +
    Compute the offset for the given coordinates in a cube.
    Definition: shape.h:165
    +
    Shape< A_::kD/B_::kD, A_::kH/B_::kH, A_::kW/B_::kW, A_::kC/B_::kC > Shape
    Definition: shape.h:126
    +
    static int const kDhw
    The number of pixels per cube.
    Definition: shape.h:87
    +
    Definition: shape.h:118
    +
    Definition: shape.h:125
    +
    Compute the offset for the given coordinates in a cube.
    Definition: shape.h:210
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    Definition: shape.h:132
    +
    Definition: shape.h:104
    +
    static int const kCount
    The number of elements in the 4D space.
    Definition: shape.h:91
    +
    static int const kDhwc
    The number of elements in the 4D space.
    Definition: shape.h:89
    +
    static int const kW
    The width of the cube.
    Definition: shape.h:70
    +
    Definition: shape.h:143
    +
    static int const kHw
    The number of pixels per image.
    Definition: shape.h:83
    +
    static int const kD
    The depth of the cube.
    Definition: shape.h:66
    +
    Definition: shape.h:154
    +
    Shape<(A_::kD > B_::kD ? A_::kD :B_::kD),(A_::kH > B_::kH ? A_::kH :B_::kH),(A_::kW > B_::kW ? A_::kW :B_::kW),(A_::kC > B_::kC ? A_::kC :B_::kC)> Shape
    Definition: shape.h:137
    +
    Basic include for CUTLASS macros.
    +
    Shape<(A_::kD< B_::kD ? A_::kD :B_::kD),(A_::kH< B_::kH ? A_::kH :B_::kH),(A_::kW< B_::kW ? A_::kW :B_::kW),(A_::kC< B_::kC ? A_::kC :B_::kC)> Shape
    Definition: shape.h:148
    +
    Compute derived counted of a Layout Concept based class.
    Definition: shape.h:79
    +
    static int const kHwc
    The number of elements per image.
    Definition: shape.h:85
    +
    + + + + diff --git a/docs/generated-html/splitbar.png b/docs/generated-html/splitbar.png new file mode 100644 index 00000000..b9c27ab3 Binary files /dev/null and b/docs/generated-html/splitbar.png differ diff --git a/docs/generated-html/structcutlass_1_1AlignedStruct.html b/docs/generated-html/structcutlass_1_1AlignedStruct.html new file mode 100644 index 00000000..b7df247d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1AlignedStruct.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: cutlass::AlignedStruct< kAlignment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::AlignedStruct< kAlignment_ > Struct Template Reference
    +
    +
    + +

    #include <vector.h>

    +
    +Inheritance diagram for cutlass::AlignedStruct< kAlignment_ >:
    +
    +
    + + +cutlass::Fragment< Element_, kElements_, kAlignment_ > + +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1AlignedStruct.png b/docs/generated-html/structcutlass_1_1AlignedStruct.png new file mode 100644 index 00000000..1cdbb00d Binary files /dev/null and b/docs/generated-html/structcutlass_1_1AlignedStruct.png differ diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape-members.html new file mode 100644 index 00000000..c31427b5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromShape< Shape_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromShape< Shape_ >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromShape< Shape_ >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape.html new file mode 100644 index 00000000..709f76ee --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape.html @@ -0,0 +1,165 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromShape< Shape_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromShape< Shape_ > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<typename Shape_>
    +struct cutlass::ComputeOffsetFromShape< Shape_ >

    + +
    Template Parameters
    + + +
    ALayout Concept where each dimension of the cube specifies the corresponding stride.
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<typename Shape_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromShape< Shape_ >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4-members.html new file mode 100644 index 00000000..5d6fc097 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html new file mode 100644 index 00000000..406a86ab --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html @@ -0,0 +1,166 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube with one channel and a depth of 1. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<int kSh_, int kSw_>
    +struct cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >

    + +
    Template Parameters
    + + + +
    kShElements in the H dimension
    kSwElements in the W dimension
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int kSh_, int kSw_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4-members.html new file mode 100644 index 00000000..9824b8c4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html new file mode 100644 index 00000000..096cdc80 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html @@ -0,0 +1,167 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube with a depth of 1. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<int kSh_, int kSw_, int kSc_>
    +struct cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >

    + +
    Template Parameters
    + + + + +
    kShElements in the H dimension
    kSwElements in the W dimension
    kScSeparation between two elements in "elements"
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int kSh_, int kSw_, int kSc_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides-members.html new file mode 100644 index 00000000..369de9ff --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromStrides< Strides_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromStrides< Strides_ >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromStrides< Strides_ >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides.html new file mode 100644 index 00000000..5c3254d1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides.html @@ -0,0 +1,165 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromStrides< Strides_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromStrides< Strides_ > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<typename Strides_>
    +struct cutlass::ComputeOffsetFromStrides< Strides_ >

    + +
    Template Parameters
    + + +
    ALayout Concept where each dimension of the cube specifies the corresponding stride.
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<typename Strides_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromStrides< Strides_ >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4-members.html new file mode 100644 index 00000000..f37d8307 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html new file mode 100644 index 00000000..a12a5aaf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html @@ -0,0 +1,166 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube with one channel and a depth of 1. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<int S_h_, int S_w_>
    +struct cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >

    + +
    Template Parameters
    + + + +
    S_hStride in the H dimension in scalars
    S_wStride in the W dimension in scalars
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int S_h_, int S_w_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4-members.html new file mode 100644 index 00000000..2f54eda3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html new file mode 100644 index 00000000..be85a7c8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html @@ -0,0 +1,167 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube with a depth of 1. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<int S_h_, int S_w_, int S_c_>
    +struct cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >

    + +
    Template Parameters
    + + + + +
    S_hStride in the H dimension in scalars
    S_wStride in the W dimension in scalars
    S_cStride between two scalars.
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int S_h_, int S_w_, int S_c_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides-members.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides-members.html new file mode 100644 index 00000000..104116b7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >, including all inherited members.

    + + +
    get()cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides.html new file mode 100644 index 00000000..d434e920 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ > Struct Template Reference
    +
    +
    + +

    Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get ()
     
    +

    Detailed Description

    +

    template<typename Threads_, typename Strides_>
    +struct cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >

    + +
    Template Parameters
    + + + +
    Threads_The dimension of the cube the threadIdx.x value is mapped on
    Strides_The strides to use when compute the offsets based on the coordinates of the cube.
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<typename Threads_ , typename Strides_ >
    + + + + + +
    + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >::get ()
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_03ed682791cf043da79a7cc93228a8c85.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_03ed682791cf043da79a7cc93228a8c85.html new file mode 100644 index 00000000..be0dd197 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_03ed682791cf043da79a7cc93228a8c85.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html new file mode 100644 index 00000000..55392ee2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > Struct Template Reference
    +
    +
    + +

    Specialization for D=1 and C=1. +

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get ()
     
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int T_h_, int T_w_, int S_h_, int S_w_>
    + + + + + +
    + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >::get ()
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html new file mode 100644 index 00000000..7a2a0294 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > Struct Template Reference
    +
    +
    + +

    Specialization for D=1. +

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get ()
     
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int T_h_, int T_w_, int T_c_, int S_h_, int S_w_, int S_c_>
    + + + + + +
    + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >::get ()
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__f2e6d84a53db391977c787a65ed62aca.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__f2e6d84a53db391977c787a65ed62aca.html new file mode 100644 index 00000000..80afb2f9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__f2e6d84a53db391977c787a65ed62aca.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter-members.html b/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter-members.html new file mode 100644 index 00000000..12404785 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter.html b/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter.html new file mode 100644 index 00000000..6e00d30b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter.html @@ -0,0 +1,231 @@ + + + + + + + +Cutlass: cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ > Struct Template Reference
    +
    +
    + +

    Adapter to enable random access to predicates via logical coordinate within a tile. +

    + +

    #include <predicate_vector.h>

    + + + + + + + + +

    +Public Types

    typedef PredicateVector_ PredicateVector
     The vector of predicates. More...
     
    typedef Iterations_ Iterations
     The iterations. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE ConstPredicateTileAdapter (PredicateVector const &predicates_)
     Ctor. More...
     
    CUTLASS_DEVICE bool at (int d, int h, int w, int c) const
     Get the value at location (d, h, w, c). More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + +
    typedef Iterations_ cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >::Iterations
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + +
    typedef PredicateVector_ cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >::PredicateVector
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ ConstPredicateTileAdapter()

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >::ConstPredicateTileAdapter (PredicateVector const & predicates_)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at()

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >::at (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Convert.html b/docs/generated-html/structcutlass_1_1Convert.html new file mode 100644 index 00000000..5b2e67b9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Convert.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::Convert< InputFragment_, OutputFragment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Convert< InputFragment_, OutputFragment_ > Struct Template Reference
    +
    +
    + +

    #include <convert.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ob568b5e19b6f78a5fa50d1f821f0bc2a.html b/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ob568b5e19b6f78a5fa50d1f821f0bc2a.html new file mode 100644 index 00000000..559a16fc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ob568b5e19b6f78a5fa50d1f821f0bc2a.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html b/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html new file mode 100644 index 00000000..daf9a756 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html @@ -0,0 +1,265 @@ + + + + + + + +Cutlass: cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > Struct Template Reference
    +
    +
    + +

    #include <convert.h>

    + + + + + + + + +

    +Public Types

    typedef Fragment< InputScalar_, kScalars_ > InputFragment
     The input fragment. More...
     
    typedef Fragment< OutputScalar_, kScalars_ > OutputFragment
     The output fragment. More...
     
    + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE Convert ()
     Ctor. More...
     
    CUTLASS_DEVICE void transform (InputFragment const &src, OutputFragment &dst)
     Transform a fragment. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void transform (Fragment_ const &src, int offset, OutputFragment &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename InputScalar_ , typename OutputScalar_ , int kScalars_>
    + + + + +
    typedef Fragment<InputScalar_, kScalars_> cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<typename InputScalar_ , typename OutputScalar_ , int kScalars_>
    + + + + +
    typedef Fragment<OutputScalar_, kScalars_> cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ Convert()

    + +
    +
    +
    +template<typename InputScalar_ , typename OutputScalar_ , int kScalars_>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::Convert ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform() [1/2]

    + +
    +
    +
    +template<typename InputScalar_ , typename OutputScalar_ , int kScalars_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform (InputFragment const & src,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ transform() [2/2]

    + +
    +
    +
    +template<typename InputScalar_ , typename OutputScalar_ , int kScalars_>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform (Fragment_ const & src,
    int offset,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Coord-members.html b/docs/generated-html/structcutlass_1_1Coord-members.html new file mode 100644 index 00000000..a8348bb1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Coord-members.html @@ -0,0 +1,114 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Coord< N_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Coord< N_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + +
    at()cutlass::Coord< N_ >inline
    at(int dim)cutlass::Coord< N_ >inline
    at() constcutlass::Coord< N_ >inline
    at(int dim) constcutlass::Coord< N_ >inline
    clamp(Coord< N > const &max, Coord< N > const &min=Coord< N >())cutlass::Coord< N_ >inline
    Coord(int value=0)cutlass::Coord< N_ >inline
    Coord(int _idx[])cutlass::Coord< N_ >inline
    count() constcutlass::Coord< N_ >inline
    dot(Coord const &b, T sum) constcutlass::Coord< N_ >inline
    dot(Coord const &b) constcutlass::Coord< N_ >inline
    idxcutlass::Coord< N_ >
    Ncutlass::Coord< N_ >static
    operator!=(Coord< N > const &b) constcutlass::Coord< N_ >inline
    operator*(Coord const &b) constcutlass::Coord< N_ >inline
    operator*=(Coord const &b)cutlass::Coord< N_ >inline
    operator+(Coord const &b) constcutlass::Coord< N_ >inline
    operator+=(Coord const &b)cutlass::Coord< N_ >inline
    operator-(Coord const &b) constcutlass::Coord< N_ >inline
    operator-=(Coord const &b)cutlass::Coord< N_ >inline
    operator/(Coord const &b) constcutlass::Coord< N_ >inline
    operator/=(Coord const &b)cutlass::Coord< N_ >inline
    operator==(Coord< N > const &b) constcutlass::Coord< N_ >inline
    operator[](int dim)cutlass::Coord< N_ >inline
    operator[](int dim) constcutlass::Coord< N_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Coord.html b/docs/generated-html/structcutlass_1_1Coord.html new file mode 100644 index 00000000..18094b37 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Coord.html @@ -0,0 +1,869 @@ + + + + + + + +Cutlass: cutlass::Coord< N_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Coord< N_ > Struct Template Reference
    +
    +
    + +

    Statically-sized array specifying Coords within a tensor. +

    + +

    #include <coord.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord (int value=0)
     Default ctor initializes uniformly. More...
     
    CUTLASS_HOST_DEVICE Coord (int _idx[])
     Constructs from an array of integers. More...
     
    CUTLASS_HOST_DEVICE Coord operator+ (Coord const &b) const
     Element-wise addition. More...
     
    CUTLASS_HOST_DEVICE Coord operator- (Coord const &b) const
     Element-wise subtraction. More...
     
    CUTLASS_HOST_DEVICE Coord operator* (Coord const &b) const
     Element-wise multiplication. More...
     
    CUTLASS_HOST_DEVICE Coord operator/ (Coord const &b) const
     Element-wise division. More...
     
    CUTLASS_HOST_DEVICE Coordoperator+= (Coord const &b)
     In-place addition. More...
     
    CUTLASS_HOST_DEVICE Coordoperator-= (Coord const &b)
     In-place subtraction. More...
     
    CUTLASS_HOST_DEVICE Coordoperator*= (Coord const &b)
     In-place multiplication. More...
     
    CUTLASS_HOST_DEVICE Coordoperator/= (Coord const &b)
     In-place division. More...
     
    CUTLASS_HOST_DEVICE int & operator[] (int dim)
     Member access operator. More...
     
    CUTLASS_HOST_DEVICE int const & operator[] (int dim) const
     Member access operator. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICEdot (Coord const &b, T sum) const
     Computes the dot product of two Coord instances. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICEdot (Coord const &b) const
     Computes the dot product of two Coord instances. More...
     
    template<int Dim>
    CUTLASS_HOST_DEVICE int & at ()
     Gets the index of a given Coord element. More...
     
    CUTLASS_HOST_DEVICE int & at (int dim)
     Access via index; may limit unrolling potential. More...
     
    template<int Dim>
    CUTLASS_HOST_DEVICE int const & at () const
     Gets the index of a given Coord element. More...
     
    CUTLASS_HOST_DEVICE int const & at (int dim) const
     Access via index; may limit unrolling potential. More...
     
    CUTLASS_HOST_DEVICE bool operator== (Coord< N > const &b) const
     Determines if two Coord<> objects are equal. More...
     
    CUTLASS_HOST_DEVICE bool operator!= (Coord< N > const &b) const
     Not equal. More...
     
    CUTLASS_HOST_DEVICE Coordclamp (Coord< N > const &max, Coord< N > const &min=Coord< N >())
     Clamps a coordinate to a range specified by maximum and minimum values. More...
     
    CUTLASS_HOST_DEVICE int count () const
     Returns the product of all elements. More...
     
    + + + + +

    +Public Attributes

    int idx [N]
     Indices. More...
     
    + + + +

    +Static Public Attributes

    static int const N = N_
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ Coord() [1/2]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::Coord< N_ >::Coord (int value = 0)
    +
    +inline
    +
    + +
    +
    + +

    ◆ Coord() [2/2]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::Coord< N_ >::Coord (int _idx[])
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at() [1/4]

    + +
    +
    +
    +template<int N_>
    +
    +template<int Dim>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int& cutlass::Coord< N_ >::at ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ at() [2/4]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int& cutlass::Coord< N_ >::at (int dim)
    +
    +inline
    +
    + +
    +
    + +

    ◆ at() [3/4]

    + +
    +
    +
    +template<int N_>
    +
    +template<int Dim>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int const& cutlass::Coord< N_ >::at () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ at() [4/4]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int const& cutlass::Coord< N_ >::at (int dim) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ clamp()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord& cutlass::Coord< N_ >::clamp (Coord< N > const & max,
    Coord< N > const & min = Coord<N>() 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ count()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::Coord< N_ >::count () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ dot() [1/2]

    + +
    +
    +
    +template<int N_>
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE T cutlass::Coord< N_ >::dot (Coord< N_ > const & b,
    sum 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ dot() [2/2]

    + +
    +
    +
    +template<int N_>
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE T cutlass::Coord< N_ >::dot (Coord< N_ > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator!=()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::Coord< N_ >::operator!= (Coord< N > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator*()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord cutlass::Coord< N_ >::operator* (Coord< N_ > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator*=()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord& cutlass::Coord< N_ >::operator*= (Coord< N_ > const & b)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator+()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord cutlass::Coord< N_ >::operator+ (Coord< N_ > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator+=()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord& cutlass::Coord< N_ >::operator+= (Coord< N_ > const & b)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator-()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord cutlass::Coord< N_ >::operator- (Coord< N_ > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator-=()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord& cutlass::Coord< N_ >::operator-= (Coord< N_ > const & b)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator/()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord cutlass::Coord< N_ >::operator/ (Coord< N_ > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator/=()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord& cutlass::Coord< N_ >::operator/= (Coord< N_ > const & b)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator==()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::Coord< N_ >::operator== (Coord< N > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [1/2]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int& cutlass::Coord< N_ >::operator[] (int dim)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [2/2]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int const& cutlass::Coord< N_ >::operator[] (int dim) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ idx

    + +
    +
    +
    +template<int N_>
    + + + + +
    int cutlass::Coord< N_ >::idx[N]
    +
    + +
    +
    + +

    ◆ N

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + +
    int const cutlass::Coord< N_ >::N = N_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Copy-members.html b/docs/generated-html/structcutlass_1_1Copy-members.html new file mode 100644 index 00000000..42d212fc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Copy-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Copy< Fragment_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Copy< Fragment_ >, including all inherited members.

    + + + + + + +
    Copy()cutlass::Copy< Fragment_ >inline
    InputFragment typedefcutlass::Copy< Fragment_ >
    OutputFragment typedefcutlass::Copy< Fragment_ >
    transform(Fragment_ const &src, Fragment_ &dst)cutlass::Copy< Fragment_ >inline
    transform(InputFragment_ const &src, int offset, Fragment_ &dst)cutlass::Copy< Fragment_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Copy.html b/docs/generated-html/structcutlass_1_1Copy.html new file mode 100644 index 00000000..65da8b81 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Copy.html @@ -0,0 +1,265 @@ + + + + + + + +Cutlass: cutlass::Copy< Fragment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Copy< Fragment_ > Struct Template Reference
    +
    +
    + +

    #include <convert.h>

    + + + + + + + + +

    +Public Types

    typedef Fragment_ InputFragment
     The input fragment. More...
     
    typedef Fragment_ OutputFragment
     The output fragment. More...
     
    + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE Copy ()
     Ctor. More...
     
    CUTLASS_DEVICE void transform (Fragment_ const &src, Fragment_ &dst)
     Transform a fragment. More...
     
    template<typename InputFragment_ >
    CUTLASS_DEVICE void transform (InputFragment_ const &src, int offset, Fragment_ &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + +
    typedef Fragment_ cutlass::Copy< Fragment_ >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + +
    typedef Fragment_ cutlass::Copy< Fragment_ >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ Copy()

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::Copy< Fragment_ >::Copy ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform() [1/2]

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::Copy< Fragment_ >::transform (Fragment_ const & src,
    Fragment_ & dst 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ transform() [2/2]

    + +
    +
    +
    +template<typename Fragment_ >
    +
    +template<typename InputFragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::Copy< Fragment_ >::transform (InputFragment_ const & src,
    int offset,
    Fragment_ & dst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent-members.html b/docs/generated-html/structcutlass_1_1Extent-members.html new file mode 100644 index 00000000..f0a499a6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Extent< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Extent< T >, including all inherited members.

    + + +
    kValuecutlass::Extent< T >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent.html b/docs/generated-html/structcutlass_1_1Extent.html new file mode 100644 index 00000000..0efd6f83 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::Extent< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Extent< T > Struct Template Reference
    +
    +
    + +

    Returns the extent of a scalar or vector. +

    + +

    #include <vector.h>

    + + + + +

    +Static Public Attributes

    static size_t const kValue = 1
     
    +

    Member Data Documentation

    + +

    ◆ kValue

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + +
    size_t const cutlass::Extent< T >::kValue = 1
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html new file mode 100644 index 00000000..f9f4108b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Extent< Vector< T, Lanes > > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Extent< Vector< T, Lanes > >, including all inherited members.

    + + +
    kValuecutlass::Extent< Vector< T, Lanes > >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html new file mode 100644 index 00000000..70599166 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::Extent< Vector< T, Lanes > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Extent< Vector< T, Lanes > > Struct Template Reference
    +
    +
    + +

    Returns the number of lanes of a vector if need be. +

    + +

    #include <vector.h>

    + + + + +

    +Static Public Attributes

    static size_t const kValue = Lanes
     
    +

    Member Data Documentation

    + +

    ◆ kValue

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    size_t const cutlass::Extent< Vector< T, Lanes > >::kValue = Lanes
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html new file mode 100644 index 00000000..7c95798e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Extent< Vector< T, Lanes > const > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Extent< Vector< T, Lanes > const >, including all inherited members.

    + + +
    kValuecutlass::Extent< Vector< T, Lanes > const >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html new file mode 100644 index 00000000..6afa4a19 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::Extent< Vector< T, Lanes > const > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Extent< Vector< T, Lanes > const > Struct Template Reference
    +
    +
    + +

    Returns the number of lanes of a vector if need be. +

    + +

    #include <vector.h>

    + + + + +

    +Static Public Attributes

    static size_t const kValue = Lanes
     
    +

    Member Data Documentation

    + +

    ◆ kValue

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    size_t const cutlass::Extent< Vector< T, Lanes > const >::kValue = Lanes
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Fragment-members.html b/docs/generated-html/structcutlass_1_1Fragment-members.html new file mode 100644 index 00000000..b6b050c0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Fragment-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Fragment< Element_, kElements_, kAlignment_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1Fragment.html b/docs/generated-html/structcutlass_1_1Fragment.html new file mode 100644 index 00000000..640994f8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Fragment.html @@ -0,0 +1,280 @@ + + + + + + + +Cutlass: cutlass::Fragment< Element_, kElements_, kAlignment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Fragment< Element_, kElements_, kAlignment_ > Struct Template Reference
    +
    +
    + +

    A template defining Fragment Concept. +

    + +

    #include <fragment.h>

    +
    +Inheritance diagram for cutlass::Fragment< Element_, kElements_, kAlignment_ >:
    +
    +
    + + +cutlass::AlignedStruct< kAlignment_ > + +
    + + + + + + + + +

    +Public Types

    typedef Fragment< Element_, kElements_ > This_
     Make sure the alignment makes sense wrt the size of elements. More...
     
    typedef Element_ Element
     The element. More...
     
    + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE void clear ()
     Clear a fragment. More...
     
    CUTLASS_DEVICE Elementoperator[] (int i)
     The accessor. More...
     
    CUTLASS_DEVICE Element const & operator[] (int i) const
     The accessor. More...
     
    + + + + +

    +Static Public Attributes

    static int const kElements = kElements_
     The number of elements. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Element

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + +
    typedef Element_ cutlass::Fragment< Element_, kElements_, kAlignment_ >::Element
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + +
    typedef Fragment<Element_, kElements_> cutlass::Fragment< Element_, kElements_, kAlignment_ >::This_
    +
    +

    Alignment must be a power of two This class.

    + +
    +
    +

    Member Function Documentation

    + +

    ◆ clear()

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::Fragment< Element_, kElements_, kAlignment_ >::clear ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [1/2]

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Element& cutlass::Fragment< Element_, kElements_, kAlignment_ >::operator[] (int i)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [2/2]

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Element const& cutlass::Fragment< Element_, kElements_, kAlignment_ >::operator[] (int i) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kElements

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + + +
    + + + + +
    int const cutlass::Fragment< Element_, kElements_, kAlignment_ >::kElements = kElements_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Fragment.png b/docs/generated-html/structcutlass_1_1Fragment.png new file mode 100644 index 00000000..c4bfbfc4 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1Fragment.png differ diff --git a/docs/generated-html/structcutlass_1_1FragmentConstIterator-members.html b/docs/generated-html/structcutlass_1_1FragmentConstIterator-members.html new file mode 100644 index 00000000..0de6feda --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentConstIterator-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >, including all inherited members.

    + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    at(int d, int h, int w, int c=0) constcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >inline
    Element typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    Fragment typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    FragmentConstIterator(OtherFragment_ &fragment, int offset=0)cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >inline
    FragmentConstIterator(FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >inline
    FragmentShape typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    Iterations typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    IterationsStrides typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    kElementsPerAccesscutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >static
    operator[](int i) constcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >inline
    pointercutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    This_ typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    valid(int d, int h, int w, int c) constcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentConstIterator.html b/docs/generated-html/structcutlass_1_1FragmentConstIterator.html new file mode 100644 index 00000000..6a02e2f1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentConstIterator.html @@ -0,0 +1,506 @@ + + + + + + + +Cutlass: cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ > Struct Template Reference
    +
    +
    + +

    #include <fragment.h>

    + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
     This class. More...
     
    typedef Fragment_ Fragment
     The fragment. More...
     
    typedef Iterations_ Iterations
     The number of iterations. More...
     
    typedef AccessType_ AccessType
     The access type. More...
     
    typedef Fragment::Element Element
     The element. More...
     
    typedef ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
     The shape of the the fragment. More...
     
    typedef ShapeStrides< FragmentShape >::Shape IterationsStrides
     The linear strides for iterations. More...
     
    + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    template<typename OtherFragment_ >
    CUTLASS_DEVICE FragmentConstIterator (OtherFragment_ &fragment, int offset=0)
     Ctor. More...
     
    CUTLASS_DEVICE FragmentConstIterator (FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)
     Create from non-constant FragmentIterator. More...
     
    CUTLASS_DEVICE AccessType const & at (int d, int h, int w, int c=0) const
     The accessor. More...
     
    CUTLASS_DEVICE AccessType const & operator[] (int i) const
     The accessor. More...
     
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + +

    +Public Attributes

    Element const * pointer
     The pointer. More...
     
    + + + + +

    +Static Public Attributes

    static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element))
     The number of elements per access. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef AccessType_ cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::AccessType
    +
    + +
    +
    + +

    ◆ Element

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef Fragment::Element cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::Element
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef Fragment_ cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef ShapeMul<Iterations, Shape<1, 1, 1, kElementsPerAccess> >::Shape cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef Iterations_ cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::Iterations
    +
    + +
    +
    + +

    ◆ IterationsStrides

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef ShapeStrides<FragmentShape>::Shape cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::IterationsStrides
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef FragmentIterator<Fragment_, Iterations_, AccessType_> cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::This_
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ FragmentConstIterator() [1/2]

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    +
    +template<typename OtherFragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::FragmentConstIterator (OtherFragment_ & fragment,
    int offset = 0 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ FragmentConstIterator() [2/2]

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::FragmentConstIterator (FragmentIterator< Fragment_, Iterations_, AccessType_ > const & rhs_)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at()

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE AccessType const& cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::at (int d,
    int h,
    int w,
    int c = 0 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]()

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE AccessType const& cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::operator[] (int i) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kElementsPerAccess

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + + +
    + + + + +
    int const cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element))
    +
    +static
    +
    + +
    +
    + +

    ◆ pointer

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    Element const* cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::pointer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentIterator-members.html b/docs/generated-html/structcutlass_1_1FragmentIterator-members.html new file mode 100644 index 00000000..eba645fe --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentIterator-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >, including all inherited members.

    + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    at(int d, int h, int w, int c=0) constcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    at(int d, int h, int w, int c=0)cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    Element typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    Fragment typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    FragmentIterator(OtherFragment_ &fragment, int offset=0)cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    FragmentShape typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    Iterations typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    kElementsPerAccesscutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >static
    operator[](int i) constcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    operator[](int i)cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    pointercutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    Strides typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    This_ typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    valid(int d, int h, int w, int c) constcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentIterator.html b/docs/generated-html/structcutlass_1_1FragmentIterator.html new file mode 100644 index 00000000..3bd495f6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentIterator.html @@ -0,0 +1,562 @@ + + + + + + + +Cutlass: cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ > Struct Template Reference
    +
    +
    + +

    A template defining Fragment Iterator Concept. +

    + +

    #include <fragment.h>

    + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
     This class. More...
     
    typedef Fragment_ Fragment
     The fragment. More...
     
    typedef Iterations_ Iterations
     The number of iterations. More...
     
    typedef AccessType_ AccessType
     The access type. More...
     
    typedef Fragment::Element Element
     The element. More...
     
    typedef ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
     The shape of the the fragment. More...
     
    typedef ShapeStrides< FragmentShape >::Shape Strides
     The linear strides for iterations. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    template<typename OtherFragment_ >
    CUTLASS_DEVICE FragmentIterator (OtherFragment_ &fragment, int offset=0)
     Ctor. More...
     
    CUTLASS_DEVICE AccessType const & at (int d, int h, int w, int c=0) const
     The accessor. More...
     
    CUTLASS_DEVICE AccessTypeat (int d, int h, int w, int c=0)
     The accessor. More...
     
    CUTLASS_DEVICE AccessType const & operator[] (int i) const
     The accessor. More...
     
    CUTLASS_DEVICE AccessTypeoperator[] (int i)
     The accessor. More...
     
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + +

    +Public Attributes

    Elementpointer
     The pointer. More...
     
    + + + + +

    +Static Public Attributes

    static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element))
     The number of elements per access. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef AccessType_ cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::AccessType
    +
    + +
    +
    + +

    ◆ Element

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef Fragment::Element cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::Element
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef Fragment_ cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef ShapeMul<Iterations, Shape<1, 1, 1, kElementsPerAccess> >::Shape cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef Iterations_ cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::Iterations
    +
    + +
    +
    + +

    ◆ Strides

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef ShapeStrides<FragmentShape>::Shape cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::Strides
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef FragmentIterator<Fragment_, Iterations_, AccessType_> cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::This_
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ FragmentIterator()

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    +
    +template<typename OtherFragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::FragmentIterator (OtherFragment_ & fragment,
    int offset = 0 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at() [1/2]

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE AccessType const& cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::at (int d,
    int h,
    int w,
    int c = 0 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ at() [2/2]

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE AccessType& cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::at (int d,
    int h,
    int w,
    int c = 0 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [1/2]

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE AccessType const& cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::operator[] (int i) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [2/2]

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE AccessType& cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::operator[] (int i)
    +
    +inline
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kElementsPerAccess

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + +
    int const cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element))
    +
    +static
    +
    + +
    +
    + +

    ◆ pointer

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    Element* cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::pointer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentLoad.html b/docs/generated-html/structcutlass_1_1FragmentLoad.html new file mode 100644 index 00000000..634c7f08 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentLoad.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::FragmentLoad< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentLoad< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_29bcae86cc02cb793583fe6b659e7a83.html b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_29bcae86cc02cb793583fe6b659e7a83.html new file mode 100644 index 00000000..13d9dfac --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_29bcae86cc02cb793583fe6b659e7a83.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html new file mode 100644 index 00000000..f0d09eca --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, kAccessSize >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &value, Scalar_ const *pointer, int offset)
     The load function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + +
    typedef Vectorize<Scalar_, kAccessSize>::Type cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load (AccessTypevalue,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__1ca6d6e2bd7dd222c0b3a77a665e36fe.html b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__1ca6d6e2bd7dd222c0b3a77a665e36fe.html new file mode 100644 index 00000000..ee593a7a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__1ca6d6e2bd7dd222c0b3a77a665e36fe.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html new file mode 100644 index 00000000..eb1e4637 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    + + + + + +

    +Public Types

    typedef FragmentElement_ AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &value, Scalar_ const *pointer, int offset)
     The load function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + +
    typedef FragmentElement_ cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load (AccessTypevalue,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStore.html b/docs/generated-html/structcutlass_1_1FragmentStore.html new file mode 100644 index 00000000..762e8139 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStore.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::FragmentStore< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentStore< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0039852e55b713e99520c56b76ce64b290.html b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0039852e55b713e99520c56b76ce64b290.html new file mode 100644 index 00000000..36e1c183 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0039852e55b713e99520c56b76ce64b290.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html new file mode 100644 index 00000000..fbbd941b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, kAccessSize >::Type AccessType
     The input type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &value, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + +
    typedef Vectorize<Scalar_, kAccessSize>::Type cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store (AccessType const & value,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html new file mode 100644 index 00000000..ed4e0fbc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    + + + + + +

    +Public Types

    typedef FragmentElement_ AccessType
     The input type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &value, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + +
    typedef FragmentElement_ cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store (AccessType const & value,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_dea9a5a5c980336e8c43a15909be3cdb.html b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_dea9a5a5c980336e8c43a15909be3cdb.html new file mode 100644 index 00000000..f2869ed0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_dea9a5a5c980336e8c43a15909be3cdb.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStream-members.html b/docs/generated-html/structcutlass_1_1FragmentStream-members.html new file mode 100644 index 00000000..fd69efbd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStream-members.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + +
    commit()cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inline
    Convert typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    convertcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    fetchcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    Fragment typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    FragmentStream()cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inline
    FragmentStream(Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inline
    Index typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    initialize_predicates(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inline
    load()cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inline
    load_iteratorcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    LoadIterator typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    predicatescutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    shared_store_fence()cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inlinestatic
    SharedStoreStorage typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    Storage typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    store_iteratorcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    StoreFragment typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    StoreIterator typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    Traits typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStream.html b/docs/generated-html/structcutlass_1_1FragmentStream.html new file mode 100644 index 00000000..8e249098 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStream.html @@ -0,0 +1,598 @@ + + + + + + + +Cutlass: cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ > Struct Template Reference
    +
    +
    + +

    Manages a pair of iterators to stream data from global memory to shared. +

    + +

    #include <fragment_stream.h>

    + + + + + +

    +Classes

    struct  Params
     Parameters passed to initialize the ierator. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Traits_ Traits
     Defines traits of WMMA GEMM tile stream. More...
     
    typedef LoadIterator_ LoadIterator
     Defines the load iterator. More...
     
    typedef StoreIterator_ StoreIterator
     Defines the store iterator. More...
     
    typedef Convert_ Convert
     Converts between tiles. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef LoadIterator::Fragment Fragment
     Loaded fragment type. More...
     
    typedef StoreIterator::Fragment StoreFragment
     Stored fragment type. More...
     
    typedef StoreIterator::Storage Storage
     Destination storage. More...
     
    typedef StoreIterator::Storage SharedStoreStorage
     The storage in shared memory. More...
     
    + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE FragmentStream ()
     
    CUTLASS_DEVICE FragmentStream (Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Constructor. More...
     
    CUTLASS_DEVICE void load ()
     Loads the fragment. More...
     
    CUTLASS_DEVICE void commit ()
     Commits the fragment. More...
     
    CUTLASS_DEVICE void initialize_predicates (Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
     Recomputes predicates. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    + + + + + + + + + + + + + + + + +

    +Public Attributes

    LoadIterator load_iterator
     Loads fragment from global memory. More...
     
    LoadIterator::PredicateVector predicates
     Predicate vector. More...
     
    StoreIterator store_iterator
     Stores fragment to shared memory. More...
     
    Fragment fetch
     Fragment fetched by load iterator. More...
     
    Convert convert
     Converts between load fragments and store fragments. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Convert

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef Convert_ cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Convert
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef LoadIterator::Fragment cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Fragment
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef Index_ cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Index
    +
    + +
    +
    + +

    ◆ LoadIterator

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef LoadIterator_ cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::LoadIterator
    +
    + +
    +
    + +

    ◆ SharedStoreStorage

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef StoreIterator::Storage cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::SharedStoreStorage
    +
    + +
    +
    + +

    ◆ Storage

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef StoreIterator::Storage cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Storage
    +
    + +
    +
    + +

    ◆ StoreFragment

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef StoreIterator::Fragment cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::StoreFragment
    +
    + +
    +
    + +

    ◆ StoreIterator

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef StoreIterator_ cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::StoreIterator
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef Traits_ cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ FragmentStream() [1/2]

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::FragmentStream ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ FragmentStream() [2/2]

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::FragmentStream (Params const & params,
    Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0) 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::initialize_predicates (Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ load()

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::load ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ shared_store_fence()

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + +
    static CUTLASS_DEVICE void cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::shared_store_fence ()
    +
    +inlinestatic
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ convert

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    Convert cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::convert
    +
    + +
    +
    + +

    ◆ fetch

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    Fragment cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::fetch
    +
    + +
    +
    + +

    ◆ load_iterator

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    LoadIterator cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::load_iterator
    +
    + +
    +
    + +

    ◆ predicates

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    LoadIterator::PredicateVector cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::predicates
    +
    + +
    +
    + +

    ◆ store_iterator

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    StoreIterator cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::store_iterator
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params-members.html b/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params-members.html new file mode 100644 index 00000000..e629def9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params.html b/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params.html new file mode 100644 index 00000000..a8708366 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params.html @@ -0,0 +1,230 @@ + + + + + + + +Cutlass: cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params Struct Reference
    +
    +
    + +

    Parameters passed to initialize the ierator. +

    + +

    #include <fragment_stream.h>

    + + + + + + + + +

    +Public Types

    typedef LoadIterator::Params LoadParams
     Load parameters. More...
     
    typedef StoreIterator::Params StoreParams
     Store parameters. More...
     
    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (LoadParams const &_load_params, StoreParams const &_store_params)
     Initializes parameters. More...
     
    + + + + + + + +

    +Public Attributes

    LoadParams load_params
     Parameters to load iterator. More...
     
    StoreParams store_params
     Parameters to the store iterator. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ LoadParams

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef LoadIterator::Params cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params::LoadParams
    +
    + +
    +
    + +

    ◆ StoreParams

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef StoreIterator::Params cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params::StoreParams
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params::initialize (LoadParams const & _load_params,
    StoreParams const & _store_params 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ load_params

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    LoadParams cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params::load_params
    +
    + +
    +
    + +

    ◆ store_params

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    StoreParams cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params::store_params
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1GemmOperand-members.html b/docs/generated-html/structcutlass_1_1GemmOperand-members.html new file mode 100644 index 00000000..1f0d6f48 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1GemmOperand-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::GemmOperand Member List
    +
    +
    + +

    This is the complete list of members for cutlass::GemmOperand, including all inherited members.

    + + + + + + +
    kA enum valuecutlass::GemmOperand
    kB enum valuecutlass::GemmOperand
    kC enum valuecutlass::GemmOperand
    kD enum valuecutlass::GemmOperand
    Kind enum namecutlass::GemmOperand
    + + + + diff --git a/docs/generated-html/structcutlass_1_1GemmOperand.html b/docs/generated-html/structcutlass_1_1GemmOperand.html new file mode 100644 index 00000000..b97ab479 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1GemmOperand.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::GemmOperand Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::GemmOperand Struct Reference
    +
    +
    + +

    Gemm operand - D = A * B + C. +

    + +

    #include <matrix_traits.h>

    + + + + +

    +Public Types

    enum  Kind { kA, +kB, +kC, +kD + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + + +
    enum cutlass::GemmOperand::Kind
    +
    + + + + + +
    Enumerator
    kA 
    kB 
    kC 
    kD 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Identity-members.html b/docs/generated-html/structcutlass_1_1Identity-members.html new file mode 100644 index 00000000..7d06d432 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Identity-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Identity Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Identity, including all inherited members.

    + + + + +
    Additive enum valuecutlass::Identity
    Kind enum namecutlass::Identity
    Multiplicative enum valuecutlass::Identity
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Identity.html b/docs/generated-html/structcutlass_1_1Identity.html new file mode 100644 index 00000000..1629a334 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Identity.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: cutlass::Identity Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Identity Struct Reference
    +
    +
    + +

    Describes identity elements. +

    + +

    #include <coord.h>

    + + + + +

    +Public Types

    enum  Kind { Additive = 0, +Multiplicative = 1 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + + +
    enum cutlass::Identity::Kind
    +
    +

    Enumeration describing identity elements. Value assignments are significant. Feel free to add or multiply by these, respectively.

    + + + +
    Enumerator
    Additive 
    Multiplicative 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1IteratorAdvance-members.html b/docs/generated-html/structcutlass_1_1IteratorAdvance-members.html new file mode 100644 index 00000000..b7e004e4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1IteratorAdvance-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::IteratorAdvance Member List
    +
    +
    + +

    This is the complete list of members for cutlass::IteratorAdvance, including all inherited members.

    + + + + + +
    kD enum valuecutlass::IteratorAdvance
    kH enum valuecutlass::IteratorAdvance
    Kind enum namecutlass::IteratorAdvance
    kW enum valuecutlass::IteratorAdvance
    + + + + diff --git a/docs/generated-html/structcutlass_1_1IteratorAdvance.html b/docs/generated-html/structcutlass_1_1IteratorAdvance.html new file mode 100644 index 00000000..91a9d3bc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1IteratorAdvance.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: cutlass::IteratorAdvance Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::IteratorAdvance Struct Reference
    +
    +
    + +

    Specifies dimension in which post-increment accesses advance. +

    + +

    #include <tile_iterator.h>

    + + + + +

    +Public Types

    enum  Kind { kD, +kH, +kW + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + + +
    Enumerator
    kD 
    kH 
    kW 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1IteratorFragment-members.html b/docs/generated-html/structcutlass_1_1IteratorFragment-members.html new file mode 100644 index 00000000..2ae9833d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1IteratorFragment-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::IteratorFragment Member List
    +
    +
    + +

    This is the complete list of members for cutlass::IteratorFragment, including all inherited members.

    + + + + +
    Kind enum namecutlass::IteratorFragment
    kScalar enum valuecutlass::IteratorFragment
    kWmmaMatrix enum valuecutlass::IteratorFragment
    + + + + diff --git a/docs/generated-html/structcutlass_1_1IteratorFragment.html b/docs/generated-html/structcutlass_1_1IteratorFragment.html new file mode 100644 index 00000000..f02ab2c9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1IteratorFragment.html @@ -0,0 +1,125 @@ + + + + + + + +Cutlass: cutlass::IteratorFragment Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::IteratorFragment Struct Reference
    +
    +
    + +

    Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. +

    + +

    #include <tile_iterator.h>

    + + + + +

    +Public Types

    enum  Kind { kScalar, +kWmmaMatrix + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + +
    Enumerator
    kScalar 
    kWmmaMatrix 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load-members.html b/docs/generated-html/structcutlass_1_1Load-members.html new file mode 100644 index 00000000..f977a385 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >
    load(AccessType &dst, Scalar_ const *pointer, int offset)cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load.html b/docs/generated-html/structcutlass_1_1Load.html new file mode 100644 index 00000000..e3640b25 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &dst, Scalar_ const *pointer, int offset)
     The load function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_, bool = (Lanes_ > 1), size_t = (sizeof(Scalar_) * Lanes_)>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_, bool = (Lanes_ > 1), size_t = (sizeof(Scalar_) * Lanes_)>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >::load (AccessTypedst,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html new file mode 100644 index 00000000..98b54d31 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >
    load(AccessType &dst, Scalar_ const *pointer, int offset)cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html new file mode 100644 index 00000000..c7036f1d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &dst, Scalar_ const *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::load (AccessTypedst,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html new file mode 100644 index 00000000..5e3d4f23 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >
    load(AccessType &dst, Scalar_ const *pointer, int offset)cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html new file mode 100644 index 00000000..432e4a03 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &dst, Scalar_ const *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::load (AccessTypedst,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html new file mode 100644 index 00000000..9b93f91b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >
    load(AccessType &dst, Scalar_ const *pointer, int offset)cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html new file mode 100644 index 00000000..021b3f7c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &dst, Scalar_ const *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::load (AccessTypedst,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html b/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html new file mode 100644 index 00000000..599c7dbe --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Load< double, 2, Memory_, true, 16 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Load< double, 2, Memory_, true, 16 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Load< double, 2, Memory_, true, 16 >
    load(AccessType &dst, double const *pointer, int offset)cutlass::Load< double, 2, Memory_, true, 16 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html b/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html new file mode 100644 index 00000000..7afbc80a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Load< double, 2, Memory_, true, 16 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Load< double, 2, Memory_, true, 16 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< double, 2 >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &dst, double const *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<double, 2>::Type cutlass::Load< double, 2, Memory_, true, 16 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Load< double, 2, Memory_, true, 16 >::load (AccessTypedst,
    double const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1MatrixLayout-members.html b/docs/generated-html/structcutlass_1_1MatrixLayout-members.html new file mode 100644 index 00000000..9a6cfd74 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1MatrixLayout-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::MatrixLayout Member List
    +
    +
    + +

    This is the complete list of members for cutlass::MatrixLayout, including all inherited members.

    + + + + +
    kColumnMajor enum valuecutlass::MatrixLayout
    Kind enum namecutlass::MatrixLayout
    kRowMajor enum valuecutlass::MatrixLayout
    + + + + diff --git a/docs/generated-html/structcutlass_1_1MatrixLayout.html b/docs/generated-html/structcutlass_1_1MatrixLayout.html new file mode 100644 index 00000000..1a79f4d1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1MatrixLayout.html @@ -0,0 +1,125 @@ + + + + + + + +Cutlass: cutlass::MatrixLayout Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::MatrixLayout Struct Reference
    +
    +
    + +

    Describes layouts of matrices. +

    + +

    #include <matrix_traits.h>

    + + + + +

    +Public Types

    enum  Kind { kRowMajor, +kColumnMajor + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + + +
    enum cutlass::MatrixLayout::Kind
    +
    + + + +
    Enumerator
    kRowMajor 
    kColumnMajor 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1MemorySpace-members.html b/docs/generated-html/structcutlass_1_1MemorySpace-members.html new file mode 100644 index 00000000..22af2209 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1MemorySpace-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::MemorySpace Member List
    +
    +
    + +

    This is the complete list of members for cutlass::MemorySpace, including all inherited members.

    + + + + + +
    kGeneric enum valuecutlass::MemorySpace
    kGlobal enum valuecutlass::MemorySpace
    Kind enum namecutlass::MemorySpace
    kShared enum valuecutlass::MemorySpace
    + + + + diff --git a/docs/generated-html/structcutlass_1_1MemorySpace.html b/docs/generated-html/structcutlass_1_1MemorySpace.html new file mode 100644 index 00000000..410826bf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1MemorySpace.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: cutlass::MemorySpace Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::MemorySpace Struct Reference
    +
    +
    + +

    Enum to specify which memory space data resides in. +

    + +

    #include <load_store.h>

    + + + + +

    +Public Types

    enum  Kind { kGeneric, +kShared, +kGlobal + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + + +
    enum cutlass::MemorySpace::Kind
    +
    + + + + +
    Enumerator
    kGeneric 
    kShared 
    kGlobal 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateTileAdapter-members.html b/docs/generated-html/structcutlass_1_1PredicateTileAdapter-members.html new file mode 100644 index 00000000..f064207c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateTileAdapter-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateTileAdapter.html b/docs/generated-html/structcutlass_1_1PredicateTileAdapter.html new file mode 100644 index 00000000..0dd3d00a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateTileAdapter.html @@ -0,0 +1,290 @@ + + + + + + + +Cutlass: cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ > Struct Template Reference
    +
    +
    + +

    Adapter to enable random access to predicates via logical coordinate within a tile. +

    + +

    #include <predicate_vector.h>

    + + + + + + + + +

    +Public Types

    typedef PredicateVector_ PredicateVector
     The vector of predicates. More...
     
    typedef Iterations_ Iterations
     The iterations. More...
     
    + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE PredicateTileAdapter (PredicateVector &predicates_)
     Ctor. More...
     
    CUTLASS_DEVICE bool at (int d, int h, int w, int c) const
     Get the value at location (d, h, w, c). More...
     
    CUTLASS_DEVICE void set (int d, int h, int w, int c, bool value)
     Set the value at location (d, h, w, c). More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + +
    typedef Iterations_ cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >::Iterations
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + +
    typedef PredicateVector_ cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >::PredicateVector
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ PredicateTileAdapter()

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >::PredicateTileAdapter (PredicateVectorpredicates_)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at()

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >::at (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ set()

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >::set (int d,
    int h,
    int w,
    int c,
    bool value 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateVector-members.html b/docs/generated-html/structcutlass_1_1PredicateVector-members.html new file mode 100644 index 00000000..b4475cc1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateVector-members.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + +
    at(int idx) constcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    begin()cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    const_begin() constcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    const_end() constcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    end()cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    fill(bool value=true)cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    is_zero() constcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    kBytescutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >static
    kPredicatescutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >static
    kPredicatesPerBytecutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >static
    kPredicateStartcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >static
    kWordCountcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >static
    operator &=(PredicateVector const &predicates)cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    operator[](int idx) constcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    operator|=(PredicateVector const &predicates)cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    PredicateVector(bool value=true)cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    set(int idx, bool value=true)cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    Storage typedefcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateVector.html b/docs/generated-html/structcutlass_1_1PredicateVector.html new file mode 100644 index 00000000..43645c57 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateVector.html @@ -0,0 +1,658 @@ + + + + + + + +Cutlass: cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ > Struct Template Reference
    +
    +
    + +

    Statically sized array of bits implementing. +

    + +

    #include <predicate_vector.h>

    + + + + + + + + + + + +

    +Classes

    class  ConstIterator
     A const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes. More...
     
    class  Iterator
     An iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates. More...
     
    struct  TrivialIterator
     Iterator that always returns true. More...
     
    + + + + +

    +Public Types

    typedef uint32_t Storage
     Storage type of individual elements. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE PredicateVector (bool value=true)
     Initialize the predicate vector. More...
     
    CUTLASS_HOST_DEVICE void fill (bool value=true)
     Fills all predicates with a given value. More...
     
    CUTLASS_HOST_DEVICE bool operator[] (int idx) const
     Accesses a bit within the predicate vector. More...
     
    CUTLASS_HOST_DEVICE bool at (int idx) const
     Accesses a bit within the predicate vector. More...
     
    CUTLASS_HOST_DEVICE void set (int idx, bool value=true)
     Set a bit within the predicate vector. More...
     
    CUTLASS_HOST_DEVICE PredicateVectoroperator &= (PredicateVector const &predicates)
     Computes the intersection of two identical predicate vectors. More...
     
    CUTLASS_HOST_DEVICE PredicateVectoroperator|= (PredicateVector const &predicates)
     Computes the union of two identical predicate vectors. More...
     
    CUTLASS_HOST_DEVICE bool is_zero () const
     Returns true if entire predicate array is zero. More...
     
    CUTLASS_DEVICE Iterator begin ()
     Returns an iterator to the start of the bit vector. More...
     
    CUTLASS_DEVICE Iterator end ()
     Returns an iterator. More...
     
    CUTLASS_DEVICE ConstIterator const_begin () const
     Returns a ConstIterator. More...
     
    CUTLASS_DEVICE ConstIterator const_end () const
     Returns a ConstIterator. More...
     
    + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kPredicates = kPredicates_
     Number of bits stored by the PredicateVector. More...
     
    static int const kPredicatesPerByte = kPredicatesPerByte_
     Number of bits stored within each byte of the predicate bit vector. More...
     
    static int const kPredicateStart = kPredicateStart_
     First bit withing each byte containing predicates. More...
     
    static int const kBytes = (kPredicates + kPredicatesPerByte - 1) / kPredicatesPerByte
     Number of bytes needed. More...
     
    static int const kWordCount = (kBytes + sizeof(Storage) - 1) / sizeof(Storage)
     Number of storage elements needed. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Storage

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + +
    typedef uint32_t cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Storage
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ PredicateVector()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::PredicateVector (bool value = true)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::at (int idx) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ begin()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE Iterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::begin ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ const_begin()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE ConstIterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::const_begin () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ const_end()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE ConstIterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::const_end () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ end()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE Iterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::end ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ fill()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::fill (bool value = true)
    +
    +inline
    +
    + +
    +
    + +

    ◆ is_zero()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::is_zero () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator &=()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE PredicateVector& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::operator&= (PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ > const & predicates)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::operator[] (int idx) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator|=()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE PredicateVector& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::operator|= (PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ > const & predicates)
    +
    +inline
    +
    + +
    +
    + +

    ◆ set()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::set (int idx,
    bool value = true 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kBytes

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::kBytes = (kPredicates + kPredicatesPerByte - 1) / kPredicatesPerByte
    +
    +static
    +
    + +
    +
    + +

    ◆ kPredicates

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::kPredicates = kPredicates_
    +
    +static
    +
    + +
    +
    + +

    ◆ kPredicatesPerByte

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::kPredicatesPerByte = kPredicatesPerByte_
    +
    +static
    +
    + +
    +
    + +

    ◆ kPredicateStart

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::kPredicateStart = kPredicateStart_
    +
    +static
    +
    + +
    +
    + +

    ◆ kWordCount

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::kWordCount = (kBytes + sizeof(Storage) - 1) / sizeof(Storage)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator-members.html b/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator-members.html new file mode 100644 index 00000000..d2f4a9c3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator.html b/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator.html new file mode 100644 index 00000000..4e008feb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator.html @@ -0,0 +1,287 @@ + + + + + + + +Cutlass: cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator Struct Reference
    +
    +
    + +

    Iterator that always returns true. +

    + +

    #include <predicate_vector.h>

    + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE TrivialIterator ()
     Constructor. More...
     
    CUTLASS_HOST_DEVICE TrivialIterator (Iterator const &it)
     Copy constructor. More...
     
    CUTLASS_HOST_DEVICE TrivialIterator (PredicateVector const &_vec)
     Constructs an iterator from a PredicateVector. More...
     
    CUTLASS_HOST_DEVICE TrivialIteratoroperator++ ()
     Pre-increment. More...
     
    CUTLASS_HOST_DEVICE TrivialIterator operator++ (int)
     Post-increment. More...
     
    CUTLASS_HOST_DEVICE bool operator* () const
     Dereferences iterator. More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ TrivialIterator() [1/3]

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::TrivialIterator ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ TrivialIterator() [2/3]

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::TrivialIterator (Iterator const & it)
    +
    +inline
    +
    + +
    +
    + +

    ◆ TrivialIterator() [3/3]

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::TrivialIterator (PredicateVector const & _vec)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ operator*()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::operator* () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator++() [1/2]

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE TrivialIterator& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::operator++ ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator++() [2/2]

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE TrivialIterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::operator++ (int )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ReshapeTile-members.html b/docs/generated-html/structcutlass_1_1ReshapeTile-members.html new file mode 100644 index 00000000..03567994 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ReshapeTile-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ReshapeTile< Tile_, kAccessSize_, bool > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ReshapeTile< Tile_, kAccessSize_, bool >, including all inherited members.

    + + +
    Tile typedefcutlass::ReshapeTile< Tile_, kAccessSize_, bool >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ReshapeTile.html b/docs/generated-html/structcutlass_1_1ReshapeTile.html new file mode 100644 index 00000000..936510b3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ReshapeTile.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ReshapeTile< Tile_, kAccessSize_, bool > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ReshapeTile< Tile_, kAccessSize_, bool > Struct Template Reference
    +
    +
    + +

    #include <reshape_tile.h>

    + + + + +

    +Public Types

    typedef Tile_ Tile
     
    +

    Member Typedef Documentation

    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_, int kAccessSize_, bool = (Tile_::kC < kAccessSize_)>
    + + + + +
    typedef Tile_ cutlass::ReshapeTile< Tile_, kAccessSize_, bool >::Tile
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4-members.html b/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4-members.html new file mode 100644 index 00000000..649f85d4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ReshapeTile< Tile_, kAccessSize_, true > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ReshapeTile< Tile_, kAccessSize_, true >, including all inherited members.

    + + +
    Tile typedefcutlass::ReshapeTile< Tile_, kAccessSize_, true >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html b/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html new file mode 100644 index 00000000..e7acf274 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ReshapeTile< Tile_, kAccessSize_, true > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ReshapeTile< Tile_, kAccessSize_, true > Struct Template Reference
    +
    +
    + +

    #include <reshape_tile.h>

    + + + + +

    +Public Types

    typedef Shape< Tile_::kD, Tile_::kH, Tile_::kW/kAccessSize_, kAccessSize_ > Tile
     
    +

    Member Typedef Documentation

    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , int kAccessSize_>
    + + + + +
    typedef Shape<Tile_::kD, Tile_::kH, Tile_::kW / kAccessSize_, kAccessSize_> cutlass::ReshapeTile< Tile_, kAccessSize_, true >::Tile
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Shape-members.html b/docs/generated-html/structcutlass_1_1Shape-members.html new file mode 100644 index 00000000..629b6d0d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Shape-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Shape< kD_, kH_, kW_, kC_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1Shape.html b/docs/generated-html/structcutlass_1_1Shape.html new file mode 100644 index 00000000..e0d5b53f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Shape.html @@ -0,0 +1,211 @@ + + + + + + + +Cutlass: cutlass::Shape< kD_, kH_, kW_, kC_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Shape< kD_, kH_, kW_, kC_ > Struct Template Reference
    +
    +
    + +

    A Shape implementing Layout Concept describing the dimensions of a cube. +

    + +

    #include <shape.h>

    + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kD = kD_
     The depth of the cube. More...
     
    static int const kH = kH_
     The height of the cube. More...
     
    static int const kW = kW_
     The width of the cube. More...
     
    static int const kC = kC_
     The number of scalars per element. More...
     
    +

    Member Data Documentation

    + +

    ◆ kC

    + +
    +
    +
    +template<int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
    + + + + + +
    + + + + +
    int const cutlass::Shape< kD_, kH_, kW_, kC_ >::kC = kC_
    +
    +static
    +
    + +
    +
    + +

    ◆ kD

    + +
    +
    +
    +template<int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
    + + + + + +
    + + + + +
    int const cutlass::Shape< kD_, kH_, kW_, kC_ >::kD = kD_
    +
    +static
    +
    + +
    +
    + +

    ◆ kH

    + +
    +
    +
    +template<int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
    + + + + + +
    + + + + +
    int const cutlass::Shape< kD_, kH_, kW_, kC_ >::kH = kH_
    +
    +static
    +
    + +
    +
    + +

    ◆ kW

    + +
    +
    +
    +template<int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
    + + + + + +
    + + + + +
    int const cutlass::Shape< kD_, kH_, kW_, kC_ >::kW = kW_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeAdd-members.html b/docs/generated-html/structcutlass_1_1ShapeAdd-members.html new file mode 100644 index 00000000..a7c0d6c4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeAdd-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeAdd< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeAdd< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeAdd< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeAdd.html b/docs/generated-html/structcutlass_1_1ShapeAdd.html new file mode 100644 index 00000000..438ecfcf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeAdd.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeAdd< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeAdd< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< A_::kD+B_::kD, A_::kH+B_::kH, A_::kW+B_::kW, A_::kC+B_::kC > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_ , typename B_ >
    + + + + +
    typedef Shape<A_::kD + B_::kD, A_::kH + B_::kH, A_::kW + B_::kW, A_::kC + B_::kC> cutlass::ShapeAdd< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeCount-members.html b/docs/generated-html/structcutlass_1_1ShapeCount-members.html new file mode 100644 index 00000000..18ff7b21 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeCount-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeCount< Shape > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeCount.html b/docs/generated-html/structcutlass_1_1ShapeCount.html new file mode 100644 index 00000000..52f180e3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeCount.html @@ -0,0 +1,265 @@ + + + + + + + +Cutlass: cutlass::ShapeCount< Shape > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeCount< Shape > Struct Template Reference
    +
    +
    + +

    Compute derived counted of a Layout Concept based class. +

    + +

    #include <shape.h>

    + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kWc = Shape::kW * Shape::kC
     The number of elements per row. More...
     
    static int const kHw = Shape::kH * Shape::kW
     The number of pixels per image. More...
     
    static int const kHwc = Shape::kH * kWc
     The number of elements per image. More...
     
    static int const kDhw = Shape::kD * kHw
     The number of pixels per cube. More...
     
    static int const kDhwc = Shape::kD * kHwc
     The number of elements in the 4D space. More...
     
    static int const kCount = kDhwc
     The number of elements in the 4D space. More...
     
    +

    Member Data Documentation

    + +

    ◆ kCount

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kCount = kDhwc
    +
    +static
    +
    + +
    +
    + +

    ◆ kDhw

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kDhw = Shape::kD * kHw
    +
    +static
    +
    + +
    +
    + +

    ◆ kDhwc

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kDhwc = Shape::kD * kHwc
    +
    +static
    +
    + +
    +
    + +

    ◆ kHw

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kHw = Shape::kH * Shape::kW
    +
    +static
    +
    + +
    +
    + +

    ◆ kHwc

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kHwc = Shape::kH * kWc
    +
    +static
    +
    + +
    +
    + +

    ◆ kWc

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kWc = Shape::kW * Shape::kC
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeDiv-members.html b/docs/generated-html/structcutlass_1_1ShapeDiv-members.html new file mode 100644 index 00000000..45aa6329 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeDiv-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeDiv< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeDiv< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeDiv< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeDiv.html b/docs/generated-html/structcutlass_1_1ShapeDiv.html new file mode 100644 index 00000000..46eb5608 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeDiv.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeDiv< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeDiv< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< A_::kD/B_::kD, A_::kH/B_::kH, A_::kW/B_::kW, A_::kC/B_::kC > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_, typename B_>
    + + + + +
    typedef Shape<A_::kD / B_::kD, A_::kH / B_::kH, A_::kW / B_::kW, A_::kC / B_::kC> cutlass::ShapeDiv< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMax-members.html b/docs/generated-html/structcutlass_1_1ShapeMax-members.html new file mode 100644 index 00000000..6c7119c7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMax-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeMax< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeMax< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeMax< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMax.html b/docs/generated-html/structcutlass_1_1ShapeMax.html new file mode 100644 index 00000000..f2f81b90 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMax.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeMax< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeMax< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape<(A_::kD > B_::kD ? A_::kD :B_::kD),(A_::kH > B_::kH ? A_::kH :B_::kH),(A_::kW > B_::kW ? A_::kW :B_::kW),(A_::kC > B_::kC ? A_::kC :B_::kC)> Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_ , typename B_ >
    + + + + +
    typedef Shape<(A_::kD > B_::kD ? A_::kD : B_::kD), (A_::kH > B_::kH ? A_::kH : B_::kH), (A_::kW > B_::kW ? A_::kW : B_::kW), (A_::kC > B_::kC ? A_::kC : B_::kC)> cutlass::ShapeMax< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMin-members.html b/docs/generated-html/structcutlass_1_1ShapeMin-members.html new file mode 100644 index 00000000..ce2f15bb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMin-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeMin< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeMin< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeMin< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMin.html b/docs/generated-html/structcutlass_1_1ShapeMin.html new file mode 100644 index 00000000..8cd3b4cf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMin.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeMin< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeMin< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape<(A_::kD< B_::kD ? A_::kD :B_::kD),(A_::kH< B_::kH ? A_::kH :B_::kH),(A_::kW< B_::kW ? A_::kW :B_::kW),(A_::kC< B_::kC ? A_::kC :B_::kC)> Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_ , typename B_ >
    + + + + +
    typedef Shape<(A_::kD < B_::kD ? A_::kD : B_::kD), (A_::kH < B_::kH ? A_::kH : B_::kH), (A_::kW < B_::kW ? A_::kW : B_::kW), (A_::kC < B_::kC ? A_::kC : B_::kC)> cutlass::ShapeMin< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMul-members.html b/docs/generated-html/structcutlass_1_1ShapeMul-members.html new file mode 100644 index 00000000..75cf214c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMul-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeMul< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeMul< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeMul< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMul.html b/docs/generated-html/structcutlass_1_1ShapeMul.html new file mode 100644 index 00000000..93d28b06 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMul.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeMul< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeMul< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_, typename B_>
    + + + + +
    typedef Shape<A_::kD * B_::kD, A_::kH * B_::kH, A_::kW * B_::kW, A_::kC * B_::kC> cutlass::ShapeMul< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeScale-members.html b/docs/generated-html/structcutlass_1_1ShapeScale-members.html new file mode 100644 index 00000000..1ba06a84 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeScale-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeScale< A_, kScale_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeScale< A_, kScale_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeScale< A_, kScale_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeScale.html b/docs/generated-html/structcutlass_1_1ShapeScale.html new file mode 100644 index 00000000..5056a89d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeScale.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeScale< A_, kScale_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeScale< A_, kScale_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< A_::kD *kScale_, A_::kH *kScale_, A_::kW *kScale_, A_::kC *kScale_ > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_ , int kScale_>
    + + + + +
    typedef Shape<A_::kD * kScale_, A_::kH * kScale_, A_::kW * kScale_, A_::kC * kScale_> cutlass::ShapeScale< A_, kScale_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeStrides-members.html b/docs/generated-html/structcutlass_1_1ShapeStrides-members.html new file mode 100644 index 00000000..0ca76c50 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeStrides-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeStrides< Shape_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeStrides< Shape_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeStrides< Shape_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeStrides.html b/docs/generated-html/structcutlass_1_1ShapeStrides.html new file mode 100644 index 00000000..4328a352 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeStrides.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeStrides< Shape_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeStrides< Shape_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< Shape_::kH *Shape_::kW *Shape_::kC, Shape_::kW *Shape_::kC, Shape_::kC, 1 > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename Shape_>
    + + + + +
    typedef Shape<Shape_::kH * Shape_::kW * Shape_::kC, Shape_::kW * Shape_::kC, Shape_::kC, 1> cutlass::ShapeStrides< Shape_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeSub-members.html b/docs/generated-html/structcutlass_1_1ShapeSub-members.html new file mode 100644 index 00000000..666b9cf0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeSub-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeSub< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeSub< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeSub< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeSub.html b/docs/generated-html/structcutlass_1_1ShapeSub.html new file mode 100644 index 00000000..81064ec1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeSub.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeSub< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeSub< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< A_::kD - B_::kD, A_::kH - B_::kH, A_::kW - B_::kW, A_::kC - B_::kC > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_ , typename B_ >
    + + + + +
    typedef Shape<A_::kD - B_::kD, A_::kH - B_::kH, A_::kW - B_::kW, A_::kC - B_::kC> cutlass::ShapeSub< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType-members.html b/docs/generated-html/structcutlass_1_1StorageType-members.html new file mode 100644 index 00000000..69e4ddfd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::StorageType< kAlignment_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::StorageType< kAlignment_ >, including all inherited members.

    + + +
    Type typedefcutlass::StorageType< kAlignment_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType.html b/docs/generated-html/structcutlass_1_1StorageType.html new file mode 100644 index 00000000..9205f95c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::StorageType< kAlignment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::StorageType< kAlignment_ > Struct Template Reference
    +
    +
    + +

    #include <fragment.h>

    + + + + +

    +Public Types

    typedef uint64_t Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    +
    +template<int kAlignment_>
    + + + + +
    typedef uint64_t cutlass::StorageType< kAlignment_ >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4-members.html b/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4-members.html new file mode 100644 index 00000000..401d5fc0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::StorageType< 1 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::StorageType< 1 >, including all inherited members.

    + + +
    Type typedefcutlass::StorageType< 1 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4.html b/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4.html new file mode 100644 index 00000000..ceab6b03 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: cutlass::StorageType< 1 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::StorageType< 1 > Struct Template Reference
    +
    +
    + +

    #include <fragment.h>

    + + + + +

    +Public Types

    typedef uint8_t Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    + + + + +
    typedef uint8_t cutlass::StorageType< 1 >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4-members.html b/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4-members.html new file mode 100644 index 00000000..ac8127c9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::StorageType< 2 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::StorageType< 2 >, including all inherited members.

    + + +
    Type typedefcutlass::StorageType< 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4.html b/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4.html new file mode 100644 index 00000000..84648725 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: cutlass::StorageType< 2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::StorageType< 2 > Struct Template Reference
    +
    +
    + +

    #include <fragment.h>

    + + + + +

    +Public Types

    typedef uint16_t Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    + + + + +
    typedef uint16_t cutlass::StorageType< 2 >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4-members.html b/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4-members.html new file mode 100644 index 00000000..bf78873c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::StorageType< 4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::StorageType< 4 >, including all inherited members.

    + + +
    Type typedefcutlass::StorageType< 4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4.html b/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4.html new file mode 100644 index 00000000..74751e57 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: cutlass::StorageType< 4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::StorageType< 4 > Struct Template Reference
    +
    +
    + +

    #include <fragment.h>

    + + + + +

    +Public Types

    typedef uint32_t Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    + + + + +
    typedef uint32_t cutlass::StorageType< 4 >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store-members.html b/docs/generated-html/structcutlass_1_1Store-members.html new file mode 100644 index 00000000..f942adf6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >
    store(AccessType const &src, Scalar_ *pointer, int offset)cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store.html b/docs/generated-html/structcutlass_1_1Store.html new file mode 100644 index 00000000..057010d1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &src, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_, bool = (Lanes_ > 1), size_t = (sizeof(Scalar_) * Lanes_)>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_, bool = (Lanes_ > 1), size_t = (sizeof(Scalar_) * Lanes_)>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >::store (AccessType const & src,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html new file mode 100644 index 00000000..c8f01cce --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >
    store(AccessType const &src, Scalar_ *pointer, int offset)cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html new file mode 100644 index 00000000..69ed5486 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &src, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::store (AccessType const & src,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html new file mode 100644 index 00000000..ad692d4f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >
    store(AccessType const &src, Scalar_ *pointer, int offset)cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html new file mode 100644 index 00000000..443c824a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &src, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::store (AccessType const & src,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html new file mode 100644 index 00000000..8f54fe41 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >
    store(AccessType const &src, Scalar_ *pointer, int offset)cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html new file mode 100644 index 00000000..5f7e301e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &src, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::store (AccessType const & src,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html b/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html new file mode 100644 index 00000000..7d13d6bb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Store< double, 2, Memory_, true, 16 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Store< double, 2, Memory_, true, 16 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Store< double, 2, Memory_, true, 16 >
    store(AccessType const &src, double *pointer, int offset)cutlass::Store< double, 2, Memory_, true, 16 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html b/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html new file mode 100644 index 00000000..71cc2ab9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Store< double, 2, Memory_, true, 16 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Store< double, 2, Memory_, true, 16 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< double, 2 >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &src, double *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<double, 2>::Type cutlass::Store< double, 2, Memory_, true, 16 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Store< double, 2, Memory_, true, 16 >::store (AccessType const & src,
    double * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase-members.html b/docs/generated-html/structcutlass_1_1TileIteratorBase-members.html new file mode 100644 index 00000000..a313a511 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase-members.html @@ -0,0 +1,114 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Delta typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Fragment typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentConstIterator typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentElement typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentIterator typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentShape typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ImmediateOffsetStrides typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Index typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inlinestatic
    Iterations typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    kAccessSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kAdvancecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kFragmentSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kIteratorFragmentcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kMemorySpacecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    PredicateVector typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Scalar typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Skew typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Storage typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ThreadOffset typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Tile typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Traits typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    valid(int d, int h, int w, int c) constcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase.html b/docs/generated-html/structcutlass_1_1TileIteratorBase.html new file mode 100644 index 00000000..a9469149 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase.html @@ -0,0 +1,695 @@ + + + + + + + +Cutlass: cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference
    +
    +
    + +

    Iterator for accessing a stripmined tile in memory. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >:
    +
    +
    + + +cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > +cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > + +
    + + + + + +

    +Classes

    struct  Params
     Parameters to the iterator. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Traits_ Traits
     concept TileTraits More...
     
    typedef Scalar_ Scalar
     Scalar element. More...
     
    typedef FragmentElement_ FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Skew_ Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + +

    +Public Member Functions

    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + +

    +Static Public Member Functions

    template<typename PredicateIterator >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static IteratorAdvance::Kind const kAdvance = Advance_
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize = Tile::kC
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Vectorize<FragmentElement, kAccessSize>::Type cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::AccessType
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits::Delta cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Delta
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Fragment<FragmentElement, ShapeCount<Iterations>::kCount * kAccessSize> cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentConstIterator

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef FragmentConstIterator<Fragment, Iterations, AccessType> cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentConstIterator
    +
    + +
    +
    + +

    ◆ FragmentElement

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef FragmentElement_ cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentElement
    +
    + +
    +
    + +

    ◆ FragmentIterator

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef FragmentIterator<Fragment, Iterations, AccessType> cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentIterator
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef FragmentIterator::FragmentShape cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits::ImmediateOffsetStrides cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Index_ cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Index
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits::Iterations cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Iterations
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef PredicateVector<ShapeCount<Iterations>::kCount> cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::PredicateVector
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Scalar_ cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Scalar
    +
    + +
    +
    + +

    ◆ Skew

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Skew_ cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Skew
    +
    + +
    +
    + +

    ◆ Storage

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Fragment<Scalar, ShapeCount<Tile>::kCount, kFragmentSize> cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Storage
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits::ThreadOffset cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits::Tile cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Tile
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits_ cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Traits
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::initialize_predicates (PredicateIterator predicate_it,
    Coord< 3 > const & bounds,
    Coord< 3 > const & offset = make_Coord(0, 0, 0) 
    )
    +
    +inlinestatic
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    int const cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kAccessSize = Tile::kC
    +
    +static
    +
    + +
    +
    + +

    ◆ kAdvance

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorAdvance::Kind const cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kAdvance = Advance_
    +
    +static
    +
    + +
    +
    + +

    ◆ kFragmentSize

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    int const cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kFragmentSize
    +
    +static
    +
    +Initial value: +
    +
    + +

    ◆ kIteratorFragment

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorFragment::Kind const cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kIteratorFragment = IteratorFragment_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kMemorySpace = MemorySpace
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase.png b/docs/generated-html/structcutlass_1_1TileIteratorBase.png new file mode 100644 index 00000000..ce0eacc9 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1TileIteratorBase.png differ diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage-members.html b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage-members.html new file mode 100644 index 00000000..f752133c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage.html b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage.html new file mode 100644 index 00000000..8bd1a05d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage.html @@ -0,0 +1,283 @@ + + + + + + + +Cutlass: cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage Struct Reference
    +
    +
    + +

    Storage object. +

    + +

    #include <tile_iterator.h>

    + + + + + + + + + + + +

    +Public Types

    typedef Scalar Scalar
     Underlying scalar type. More...
     
    typedef ShapeAdd< Tile, Skew >::Shape Allocation
     Shape of allocation. More...
     
    typedef Tile Shape
     Shape of array. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Scalardata ()
     Returns a raw pointer. More...
     
    CUTLASS_HOST_DEVICE Scalar const * data () const
     Returns a raw pointer. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_HOST_DEVICE Index leading_dim ()
     Returns the leading dimension. More...
     
    + + + + +

    +Public Attributes

    Scalar scalars [Allocation::kD][Allocation::kH][Allocation::kW][Allocation::kC]
     Data storage. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Allocation

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef ShapeAdd<Tile, Skew>::Shape cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::Allocation
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Scalar cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::Scalar
    +
    + +
    +
    + +

    ◆ Shape

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Tile cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::Shape
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data() [1/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Scalar* cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::data ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ data() [2/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Scalar const* cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ leading_dim()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    static CUTLASS_HOST_DEVICE Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::leading_dim ()
    +
    +inlinestatic
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ scalars

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Scalar cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::scalars[Allocation::kD][Allocation::kH][Allocation::kW][Allocation::kC]
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params-members.html b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params-members.html new file mode 100644 index 00000000..3acf4206 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params-members.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params, including all inherited members.

    + + + + + + + + + + + +
    inc_advancecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize(Index _stride_d, Index _stride_h, Index _stride_w)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize()cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    stride_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.html b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.html new file mode 100644 index 00000000..be921381 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.html @@ -0,0 +1,391 @@ + + + + + + + +Cutlass: cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference
    +
    +
    + +

    Parameters to the iterator. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params:
    +
    +
    + + +cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params +cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params +cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params + +
    + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w)
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    + + + + + + + + + + + + + + + +

    +Public Attributes

    Index stride_d
     
    Index stride_h
     
    Index stride_w
     
    Index inc_d
     
    Index inc_h
     
    Index inc_w
     
    Index inc_advance
     
    +

    Member Function Documentation

    + +

    ◆ initialize() [1/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Index _stride_d,
    Index _stride_h,
    Index _stride_w,
    Index _inc_d,
    Index _inc_h,
    Index _inc_w,
    Index _inc_advance 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [2/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Index _stride_d,
    Index _stride_h,
    Index _stride_w 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [3/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ inc_advance

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::inc_advance
    +
    + +
    +
    + +

    ◆ inc_d

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::inc_d
    +
    + +
    +
    + +

    ◆ inc_h

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::inc_h
    +
    + +
    +
    + +

    ◆ inc_w

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::inc_w
    +
    + +
    +
    + +

    ◆ stride_d

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::stride_d
    +
    + +
    +
    + +

    ◆ stride_h

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::stride_h
    +
    + +
    +
    + +

    ◆ stride_w

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::stride_w
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.png b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.png new file mode 100644 index 00000000..f1c87463 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.png differ diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator-members.html b/docs/generated-html/structcutlass_1_1TileLoadIterator-members.html new file mode 100644 index 00000000..6acaea33 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileLoadIterator-members.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Base typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    BaseParams typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    data() constcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Delta typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Fragment typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentConstIterator typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentElement typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentIterator typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentShape typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ImmediateOffsetStrides typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    inc_advance()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_d()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_h()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_stage()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_w()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Index typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Iterations typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    kAccessSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kAdvancecutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kFragmentSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kIteratorFragmentcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kMemorySpacecutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kRequiresLoadFence enum valuecutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    load(Fragment &fragment, PredicateIterator pred_it) constcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    load(Fragment &fragment) constcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    load_post_increment(Fragment &fragment, PredicateIterator pred_it)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    load_post_increment(Fragment &fragment)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    paramscutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Pointer typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    PredicateVector typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Scalar typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    SharedStorage typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Skew typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    stagecutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Storage typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    thread_offsetcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ThreadOffset typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Tile typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    TileLoadIterator()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    TileLoadIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    TileLoadIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Traits typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    valid(int d, int h, int w, int c) constcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator.html b/docs/generated-html/structcutlass_1_1TileLoadIterator.html new file mode 100644 index 00000000..d670b93f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileLoadIterator.html @@ -0,0 +1,1253 @@ + + + + + + + +Cutlass: cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference
    +
    +
    + +

    An iterator implementing Tile Load Iterator Concept for loading a tile from memory. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >:
    +
    +
    + + +cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > + +
    + + + + + +

    +Classes

    struct  Params
     Parameters. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    enum  { kRequiresLoadFence = Tile::kD == 1 + }
     Do we require a fence? More...
     
    typedef TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Base
     Base class. More...
     
    typedef Base::Traits Traits
     concept TileTraits More...
     
    typedef Base::Scalar Scalar
     Scalar element. More...
     
    typedef Base::FragmentElement FragmentElement
     Fragment element. More...
     
    typedef Base::Index Index
     Index type. More...
     
    typedef Base::Skew Skew
     Skew quantity. More...
     
    typedef Base::Tile Tile
     Tile shape. More...
     
    typedef Base::Delta Delta
     Delta. More...
     
    typedef Base::Iterations Iterations
     Iterations. More...
     
    typedef Base::ThreadOffset ThreadOffset
     ThreadOffset functor. More...
     
    typedef Base::FragmentShape FragmentShape
     Fragment type. More...
     
    typedef Base::AccessType AccessType
     Memory access type. More...
     
    typedef Base::Fragment Fragment
     Fragment definition. More...
     
    typedef Base::FragmentIterator FragmentIterator
     Fragment iterator definition. More...
     
    typedef Base::FragmentConstIterator FragmentConstIterator
     Fragment const iterator definition. More...
     
    typedef Base::PredicateVector PredicateVector
     Default predicate mask type. More...
     
    typedef Base::Storage SharedStorage
     Storage object that may be loaded from. More...
     
    typedef Base::Params BaseParams
     IteratorBase parameters. More...
     
    typedef Scalar const * Pointer
     The pointer type. More...
     
    - Public Types inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    typedef Traits_ Traits
     concept TileTraits More...
     
    typedef Scalar_ Scalar
     Scalar element. More...
     
    typedef FragmentElement_ FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Skew_ Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    template<typename PredicateIterator >
    CUTLASS_HOST_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator ()
     Default constructor. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator (Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile load iterator. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator (Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile load iterator. More...
     
    CUTLASS_HOST_DEVICE Scalar const * data () const
     Returns the current pointer. More...
     
    CUTLASS_HOST_DEVICE void inc_d ()
     Increment in the D dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_h ()
     Increment in the H dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_w ()
     Increment in the W dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_advance ()
     Increment in the next dimension. More...
     
    CUTLASS_DEVICE void inc_stage ()
     Increment the stage. More...
     
    template<typename Fragment , typename PredicateIterator >
    CUTLASS_HOST_DEVICE void load_post_increment (Fragment &fragment, PredicateIterator pred_it)
     Loads a fragment and advances the iterator to the next tile. More...
     
    template<typename Fragment >
    CUTLASS_HOST_DEVICE void load_post_increment (Fragment &fragment)
     Loads a fragment and advances the iterator to the next tile. More...
     
    template<typename Fragment , typename PredicateIterator >
    CUTLASS_HOST_DEVICE void load (Fragment &fragment, PredicateIterator pred_it) const
     Loads a fragment without advancing the iterator.. More...
     
    template<typename Fragment >
    CUTLASS_HOST_DEVICE void load (Fragment &fragment) const
     Loads a fragment without advancing the iterator.. More...
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + + + + + + +

    +Public Attributes

    Params params
     Parameters structure. More...
     
    Coord< 4 > thread_offset
     Offset of an individual lane from the start of the tile. More...
     
    int stage
     Stage argument enables wrapping after some number of tiles have been loaded. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static IteratorAdvance::Kind const kAdvance = Base::kAdvance
     Specifies in which dimension post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment = Base::kIteratorFragment
     Specifies type of iterator fragment storage (Salar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace = Base::kMemorySpace
     Source or destination memory space. More...
     
    - Static Public Attributes inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    static IteratorAdvance::Kind const kAdvance = Advance_
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize = Tile::kC
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    + + + + + + +

    +Additional Inherited Members

    - Static Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    template<typename PredicateIterator >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::AccessType cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::AccessType
    +
    + +
    +
    + +

    ◆ Base

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef TileIteratorBase<Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_> cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Base
    +
    + +
    +
    + +

    ◆ BaseParams

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Params cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseParams
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Delta cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Delta
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Fragment cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentConstIterator

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentConstIterator cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentConstIterator
    +
    + +
    +
    + +

    ◆ FragmentElement

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentElement cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentElement
    +
    + +
    +
    + +

    ◆ FragmentIterator

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentIterator cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentIterator
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentShape cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Index cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Index
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Iterations cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Scalar const* cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Pointer
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::PredicateVector cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::PredicateVector
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Scalar cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Storage cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ Skew

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Skew cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Skew
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::ThreadOffset cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Tile cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Tile
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Traits cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Traits
    +
    + +
    +
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    kRequiresLoadFence 
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ TileLoadIterator() [1/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileLoadIterator ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ TileLoadIterator() [2/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileLoadIterator (Params const & _params,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0),
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ TileLoadIterator() [3/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileLoadIterator (Params const & ,
    SharedStorageshared_storage,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0),
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Scalar const* cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_advance()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_advance ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_d()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_d ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_h()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_h ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_stage()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_stage ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_w()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_w ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::initialize_predicates (PredicateIterator predicate_it,
    Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset = make_Coord(0,                                                                                           0,                                                                                           0) 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ load() [1/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment , typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::load (Fragmentfragment,
    PredicateIterator pred_it 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ load() [2/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::load (Fragmentfragment) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ load_post_increment() [1/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment , typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::load_post_increment (Fragmentfragment,
    PredicateIterator pred_it 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ load_post_increment() [2/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::load_post_increment (Fragmentfragment)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAdvance

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorAdvance::Kind const cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kAdvance = Base::kAdvance
    +
    +static
    +
    + +
    +
    + +

    ◆ kIteratorFragment

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorFragment::Kind const cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kIteratorFragment = Base::kIteratorFragment
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kMemorySpace = Base::kMemorySpace
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Params cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::params
    +
    + +
    +
    + +

    ◆ stage

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    int cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::stage
    +
    + +
    +
    + +

    ◆ thread_offset

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Coord<4> cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::thread_offset
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator.png b/docs/generated-html/structcutlass_1_1TileLoadIterator.png new file mode 100644 index 00000000..30866fa8 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1TileLoadIterator.png differ diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params-members.html b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params-members.html new file mode 100644 index 00000000..1977795e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params, including all inherited members.

    + + + + + + + + + + + + + + + +
    inc_advancecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    initialize(SharedStorage const &storage)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize(Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    pointercutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.html b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.html new file mode 100644 index 00000000..b25879f3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.html @@ -0,0 +1,350 @@ + + + + + + + +Cutlass: cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference
    +
    +
    + +

    Parameters. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params:
    +
    +
    + + +cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params +cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params + +
    + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (SharedStorage const &storage)
     Initialize params to access storage object. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)
     Initializes params to access a raw pointer. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w)
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Scalar const * pointer
     Pointer to memory. More...
     
    - Public Attributes inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    Index stride_d
     
    Index stride_h
     
    Index stride_w
     
    Index inc_d
     
    Index inc_h
     
    Index inc_w
     
    Index inc_advance
     
    +

    Member Function Documentation

    + +

    ◆ initialize() [1/4]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (SharedStorage const & storage)
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [2/4]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Scalar const * ptr,
    Index stride_d,
    Index stride_h,
    Index stride_w 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [3/4]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Scalar const * ptr,
    Index _stride_d,
    Index _stride_h,
    Index _stride_w,
    Index _inc_d,
    Index _inc_h,
    Index _inc_w,
    Index _inc_advance 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [4/4]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ pointer

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Scalar const* cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::pointer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.png b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.png new file mode 100644 index 00000000..99933891 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.png differ diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator-members.html b/docs/generated-html/structcutlass_1_1TileStoreIterator-members.html new file mode 100644 index 00000000..f24d2dcd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileStoreIterator-members.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Base typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    BaseParams typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    data() constcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Delta typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Fragment typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentConstIterator typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentElement typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentIterator typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentShape typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ImmediateOffsetStrides typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    inc_advance()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_d()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_h()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_stage()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_w()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Index typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Iterations typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    kAccessSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kAdvancecutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kFragmentSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kIteratorFragmentcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kMemorySpacecutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    paramscutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    PredicateVector typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Scalar typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    SharedStorage typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Skew typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    stagecutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Storage typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    store(Fragment &fragment, PredicateIterator pred_it) constcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    store(Fragment &fragment) constcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    store_post_increment(Fragment &fragment, PredicateIterator pred_it)cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    store_post_increment(Fragment &fragment)cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    thread_offsetcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ThreadOffset typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Tile typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    TileStoreIterator()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    TileStoreIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    TileStoreIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Traits typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    valid(int d, int h, int w, int c) constcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator.html b/docs/generated-html/structcutlass_1_1TileStoreIterator.html new file mode 100644 index 00000000..4fe6f216 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileStoreIterator.html @@ -0,0 +1,1210 @@ + + + + + + + +Cutlass: cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference
    +
    +
    + +

    An iterator implementing Tile Store Iterator Concept for storing a tile to memory. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >:
    +
    +
    + + +cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > + +
    + + + + + +

    +Classes

    struct  Params
     Parameters. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Base
     Base class. More...
     
    typedef Base::Traits Traits
     concept TileTraits More...
     
    typedef Base::Scalar Scalar
     Scalar element. More...
     
    typedef Base::FragmentElement FragmentElement
     Fragment element. More...
     
    typedef Base::Index Index
     Index type. More...
     
    typedef Base::Skew Skew
     Skew quantity. More...
     
    typedef Base::Tile Tile
     Tile shape. More...
     
    typedef Base::Delta Delta
     Delta. More...
     
    typedef Base::Iterations Iterations
     Iterations. More...
     
    typedef Base::ThreadOffset ThreadOffset
     ThreadOffset functor. More...
     
    typedef Base::FragmentShape FragmentShape
     Fragment type. More...
     
    typedef Base::AccessType AccessType
     Memory access type. More...
     
    typedef Base::Fragment Fragment
     Fragment definition. More...
     
    typedef Base::FragmentIterator FragmentIterator
     Fragment iterator definition. More...
     
    typedef Base::FragmentConstIterator FragmentConstIterator
     Fragment const iterator definition. More...
     
    typedef Base::PredicateVector PredicateVector
     Default predicate mask type. More...
     
    typedef Base::Storage SharedStorage
     Storage object which may be stored to. More...
     
    typedef Base::Params BaseParams
     IteratorBase parameters. More...
     
    - Public Types inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    typedef Traits_ Traits
     concept TileTraits More...
     
    typedef Scalar_ Scalar
     Scalar element. More...
     
    typedef FragmentElement_ FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Skew_ Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    template<typename PredicateIterator >
    CUTLASS_HOST_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    CUTLASS_HOST_DEVICE TileStoreIterator ()
     Default constructor. More...
     
    CUTLASS_HOST_DEVICE TileStoreIterator (Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile store iterator. More...
     
    CUTLASS_HOST_DEVICE TileStoreIterator (Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile store iterator. More...
     
    CUTLASS_HOST_DEVICE Scalardata () const
     Returns the current pointer. More...
     
    CUTLASS_HOST_DEVICE void inc_d ()
     Increment in the D dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_h ()
     Increment in the H dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_w ()
     Increment in the W dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_advance ()
     Increment in the next dimension. More...
     
    CUTLASS_DEVICE void inc_stage ()
     Increment the stage. More...
     
    template<typename Fragment , typename PredicateIterator >
    CUTLASS_HOST_DEVICE void store_post_increment (Fragment &fragment, PredicateIterator pred_it)
     Stores a fragment and advances to the next tile. More...
     
    template<typename Fragment >
    CUTLASS_HOST_DEVICE void store_post_increment (Fragment &fragment)
     Stores a fragment and advances to the next tile. More...
     
    template<typename Fragment , typename PredicateIterator >
    CUTLASS_HOST_DEVICE void store (Fragment &fragment, PredicateIterator pred_it) const
     Stores a fragment without advancing the iterator. More...
     
    template<typename Fragment >
    CUTLASS_HOST_DEVICE void store (Fragment &fragment) const
     Stores a fragment without advancing the iterator. More...
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + + + + + + +

    +Public Attributes

    Params params
     Parameters structure. More...
     
    Coord< 4 > thread_offset
     Offset of an individual lane from the start of the tile. More...
     
    int stage
     The stage. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static IteratorAdvance::Kind const kAdvance = Base::kAdvance
     Specifies in which dimension post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment = Base::kIteratorFragment
     Specifies type of iterator fragment storage (Salar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace = Base::kMemorySpace
     Source or destination memory space. More...
     
    - Static Public Attributes inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    static IteratorAdvance::Kind const kAdvance = Advance_
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize = Tile::kC
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    + + + + + + +

    +Additional Inherited Members

    - Static Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    template<typename PredicateIterator >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::AccessType cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::AccessType
    +
    + +
    +
    + +

    ◆ Base

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef TileIteratorBase<Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_> cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Base
    +
    + +
    +
    + +

    ◆ BaseParams

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Params cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseParams
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Delta cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Delta
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Fragment cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentConstIterator

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentConstIterator cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentConstIterator
    +
    + +
    +
    + +

    ◆ FragmentElement

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentElement cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentElement
    +
    + +
    +
    + +

    ◆ FragmentIterator

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentIterator cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentIterator
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentShape cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Index cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Index
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Iterations cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Iterations
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::PredicateVector cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::PredicateVector
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Scalar cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Storage cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ Skew

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Skew cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Skew
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::ThreadOffset cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Tile cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Tile
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Traits cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ TileStoreIterator() [1/3]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileStoreIterator ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ TileStoreIterator() [2/3]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileStoreIterator (Params const & _params,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0),
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ TileStoreIterator() [3/3]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileStoreIterator (Params const & ,
    SharedStorageshared_storage,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0),
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Scalar* cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_advance()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_advance ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_d()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_d ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_h()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_h ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_stage()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_stage ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_w()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_w ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::initialize_predicates (PredicateIterator predicate_it,
    Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset = make_Coord(0,                                                                                           0,                                                                                           0) 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ store() [1/2]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment , typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::store (Fragmentfragment,
    PredicateIterator pred_it 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ store() [2/2]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::store (Fragmentfragment) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ store_post_increment() [1/2]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment , typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::store_post_increment (Fragmentfragment,
    PredicateIterator pred_it 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ store_post_increment() [2/2]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::store_post_increment (Fragmentfragment)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAdvance

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorAdvance::Kind const cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kAdvance = Base::kAdvance
    +
    +static
    +
    + +
    +
    + +

    ◆ kIteratorFragment

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorFragment::Kind const cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kIteratorFragment = Base::kIteratorFragment
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kMemorySpace = Base::kMemorySpace
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Params cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::params
    +
    + +
    +
    + +

    ◆ stage

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    int cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::stage
    +
    + +
    +
    + +

    ◆ thread_offset

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Coord<4> cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::thread_offset
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator.png b/docs/generated-html/structcutlass_1_1TileStoreIterator.png new file mode 100644 index 00000000..a20f18cf Binary files /dev/null and b/docs/generated-html/structcutlass_1_1TileStoreIterator.png differ diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params-members.html b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params-members.html new file mode 100644 index 00000000..5d34eba8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params, including all inherited members.

    + + + + + + + + + + + + + + + +
    inc_advancecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    initialize(SharedStorage &storage)cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w)cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize(Scalar *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    pointercutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.html b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.html new file mode 100644 index 00000000..3da80d41 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.html @@ -0,0 +1,350 @@ + + + + + + + +Cutlass: cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference
    +
    +
    + +

    Parameters. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params:
    +
    +
    + + +cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params + +
    + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (SharedStorage &storage)
     Initialize params to access storage object. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar *ptr, Index stride_d, Index stride_h, Index stride_w)
     Initializes params to access a raw pointer. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize ()
     Initializes params to default values. More...
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w)
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Scalarpointer
     Pointer to memory. More...
     
    - Public Attributes inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    Index stride_d
     
    Index stride_h
     
    Index stride_w
     
    Index inc_d
     
    Index inc_h
     
    Index inc_w
     
    Index inc_advance
     
    +

    Member Function Documentation

    + +

    ◆ initialize() [1/4]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (SharedStoragestorage)
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [2/4]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Scalarptr,
    Index stride_d,
    Index stride_h,
    Index stride_w 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [3/4]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Scalarptr,
    Index _stride_d,
    Index _stride_h,
    Index _stride_w,
    Index _inc_d,
    Index _inc_h,
    Index _inc_w,
    Index _inc_advance 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [4/4]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ pointer

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Scalar* cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::pointer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.png b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.png new file mode 100644 index 00000000..aabb9a31 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.png differ diff --git a/docs/generated-html/structcutlass_1_1TileTraits-members.html b/docs/generated-html/structcutlass_1_1TileTraits-members.html new file mode 100644 index 00000000..c4d8ddf5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraits-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraits.html b/docs/generated-html/structcutlass_1_1TileTraits.html new file mode 100644 index 00000000..b81a5194 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraits.html @@ -0,0 +1,179 @@ + + + + + + + +Cutlass: cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ > Struct Template Reference
    +
    +
    + +

    A template defining Tile Traits Concept. +

    + +

    #include <tile_iterator.h>

    + + + + + + + + + + + + + + +

    +Public Types

    typedef Tile_ Tile
     Shape of the tile. More...
     
    typedef Delta_ Delta
     Number of steps between accesses along each dimension. More...
     
    typedef Iterations_ Iterations
     Number of accesses performed. More...
     
    typedef ThreadOffset_ ThreadOffset
     Functor that returns the logical coordinate of each entity's initial offset in the tile. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Tile_ , typename Delta_ , typename Iterations_ , typename ThreadOffset_ >
    + + + + +
    typedef Delta_ cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Tile_ , typename Delta_ , typename Iterations_ , typename ThreadOffset_ >
    + + + + +
    typedef Iterations_ cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >::Iterations
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Tile_ , typename Delta_ , typename Iterations_ , typename ThreadOffset_ >
    + + + + +
    typedef ThreadOffset_ cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , typename Delta_ , typename Iterations_ , typename ThreadOffset_ >
    + + + + +
    typedef Tile_ cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >::Tile
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor-members.html b/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor-members.html new file mode 100644 index 00000000..e33d5655 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraitsContiguousMajor< Tile_, Threads > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor.html b/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor.html new file mode 100644 index 00000000..777f6136 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor.html @@ -0,0 +1,232 @@ + + + + + + + +Cutlass: cutlass::TileTraitsContiguousMajor< Tile_, Threads > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraitsContiguousMajor< Tile_, Threads > Struct Template Reference
    +
    +
    + +

    #include <tile_traits_standard.h>

    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Tile_ Tile
     Shape of tile. More...
     
    typedef Shape< 1, 1, kThreadsThreadShape
     Thread shape. More...
     
    typedef Shape< 1, 1, kThreadsDelta
     Delta between each thread's access. More...
     
    typedef Shape< 1, Tile::kH, Tile::kW/kThreadsIterations
     Number of iterations. More...
     
    typedef TiledThreadOffset< ThreadShapeThreadOffset
     Computes the initial offset. More...
     
    + + + + +

    +Static Public Attributes

    static int const kThreads = Threads
     Number of participating threads. More...
     
    +

    Detailed Description

    +

    template<typename Tile_, int Threads>
    +struct cutlass::TileTraitsContiguousMajor< Tile_, Threads >

    + +

    Tiling in which the number of threads is fewer than the tile size in the contiguous dimension.

    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, 1, kThreads> cutlass::TileTraitsContiguousMajor< Tile_, Threads >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, Tile::kH, Tile::kW / kThreads> cutlass::TileTraitsContiguousMajor< Tile_, Threads >::Iterations
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef TiledThreadOffset<ThreadShape> cutlass::TileTraitsContiguousMajor< Tile_, Threads >::ThreadOffset
    +
    + +
    +
    + +

    ◆ ThreadShape

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, 1, kThreads> cutlass::TileTraitsContiguousMajor< Tile_, Threads >::ThreadShape
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Tile_ cutlass::TileTraitsContiguousMajor< Tile_, Threads >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsContiguousMajor< Tile_, Threads >::kThreads = Threads
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsStandard-members.html b/docs/generated-html/structcutlass_1_1TileTraitsStandard-members.html new file mode 100644 index 00000000..4732a54b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsStandard-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraitsStandard< Tile_, Threads > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsStandard.html b/docs/generated-html/structcutlass_1_1TileTraitsStandard.html new file mode 100644 index 00000000..7806ece4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsStandard.html @@ -0,0 +1,208 @@ + + + + + + + +Cutlass: cutlass::TileTraitsStandard< Tile_, Threads > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraitsStandard< Tile_, Threads > Struct Template Reference
    +
    +
    + +

    Chooses 'best' shape to enable warp raking along contiguous dimension if possible. +

    + +

    #include <tile_traits_standard.h>

    + + + + + +

    +Public Types

    typedef Tile_ Tile
     Shape of tile. More...
     
    + + + + + + + + + + +

    +Static Public Attributes

    static int const kThreads = Threads
     Number of participating threads. More...
     
    static int const kWarpSize = 32
     Hard-coded warp size. More...
     
    static int const kWarpCount = kThreads / kWarpSize
     Number of participating warps. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Tile_ cutlass::TileTraitsStandard< Tile_, Threads >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsStandard< Tile_, Threads >::kThreads = Threads
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpCount

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsStandard< Tile_, Threads >::kWarpCount = kThreads / kWarpSize
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpSize

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsStandard< Tile_, Threads >::kWarpSize = 32
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor-members.html b/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor-members.html new file mode 100644 index 00000000..2ae51908 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraitsStrideMajor< Tile_, Threads > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor.html b/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor.html new file mode 100644 index 00000000..d24bc59b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor.html @@ -0,0 +1,232 @@ + + + + + + + +Cutlass: cutlass::TileTraitsStrideMajor< Tile_, Threads > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraitsStrideMajor< Tile_, Threads > Struct Template Reference
    +
    +
    + +

    #include <tile_traits_standard.h>

    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Tile_ Tile
     Shape of tile. More...
     
    typedef Shape< 1, kThreads/Tile::kW, Tile::kW, 1 > ThreadShape
     Shape of threads. More...
     
    typedef Shape< 1, ThreadShape::kH, 1, 1 > Delta
     Delta along each dimension. More...
     
    typedef Shape< 1, Tile::kH/ThreadShape::kH, 1, 1 > Iterations
     Number of iterations. More...
     
    typedef TiledThreadOffset< ThreadShapeThreadOffset
     Computes the initial offset. More...
     
    + + + + +

    +Static Public Attributes

    static int const kThreads = Threads
     Number of participating threads. More...
     
    +

    Detailed Description

    +

    template<typename Tile_, int Threads>
    +struct cutlass::TileTraitsStrideMajor< Tile_, Threads >

    + +

    Tiling in which the number of threads is greater than the contiguous dimension of the tile.

    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, ThreadShape::kH, 1, 1> cutlass::TileTraitsStrideMajor< Tile_, Threads >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, Tile::kH / ThreadShape::kH, 1, 1> cutlass::TileTraitsStrideMajor< Tile_, Threads >::Iterations
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef TiledThreadOffset<ThreadShape> cutlass::TileTraitsStrideMajor< Tile_, Threads >::ThreadOffset
    +
    + +
    +
    + +

    ◆ ThreadShape

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, kThreads / Tile::kW, Tile::kW, 1> cutlass::TileTraitsStrideMajor< Tile_, Threads >::ThreadShape
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Tile_ cutlass::TileTraitsStrideMajor< Tile_, Threads >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsStrideMajor< Tile_, Threads >::kThreads = Threads
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsWarpRake-members.html b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake-members.html new file mode 100644 index 00000000..e76c228b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake-members.html @@ -0,0 +1,99 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraitsWarpRake< Tile_, Threads > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsWarpRake.html b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake.html new file mode 100644 index 00000000..771a8e41 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake.html @@ -0,0 +1,326 @@ + + + + + + + +Cutlass: cutlass::TileTraitsWarpRake< Tile_, Threads > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraitsWarpRake< Tile_, Threads > Struct Template Reference
    +
    +
    + +

    Tiling in which warps rake across the contiguous dimension. +

    + +

    #include <tile_traits_standard.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + +

    +Public Types

    typedef Tile_ Tile
     Shape of tile. More...
     
    typedef Shape< 1, kWarpsStrided, kWarpsContiguous *kWarpSizeThreadShape
     Arrangement of threads. More...
     
    typedef Shape< 1, kWarpsStrided, kWarpSizeDelta
     The same warp rakes along the contiguous dimension. More...
     
    typedef Shape< 1, Tile::kH/Delta::kH, Tile::kW/ThreadShape::kWIterations
     Number of iterations. More...
     
    + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kThreads = Threads
     Number of participating threads. More...
     
    static int const kWarpSize = 32
     Hard-coded warp size. More...
     
    static int const kWarpCount = kThreads / kWarpSize
     Number of participating warps. More...
     
    static int const kWarpsStrided = __NV_STD_MIN(kWarpCount, Tile::kH)
     Warps strip-mined across strided dimension. More...
     
    static int const kWarpsContiguous = kWarpCount / kWarpsStrided
     Warps stripmined contiguous dimension. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, kWarpsStrided, kWarpSize> cutlass::TileTraitsWarpRake< Tile_, Threads >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, Tile::kH / Delta::kH, Tile::kW / ThreadShape::kW> cutlass::TileTraitsWarpRake< Tile_, Threads >::Iterations
    +
    + +
    +
    + +

    ◆ ThreadShape

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, kWarpsStrided, kWarpsContiguous * kWarpSize> cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadShape
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Tile_ cutlass::TileTraitsWarpRake< Tile_, Threads >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsWarpRake< Tile_, Threads >::kThreads = Threads
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpCount

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsWarpRake< Tile_, Threads >::kWarpCount = kThreads / kWarpSize
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpsContiguous

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsWarpRake< Tile_, Threads >::kWarpsContiguous = kWarpCount / kWarpsStrided
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpSize

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsWarpRake< Tile_, Threads >::kWarpSize = 32
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpsStrided

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsWarpRake< Tile_, Threads >::kWarpsStrided = __NV_STD_MIN(kWarpCount, Tile::kH)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset-members.html new file mode 100644 index 00000000..e816dc74 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset, including all inherited members.

    + + +
    operator()() constcutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffsetinline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html new file mode 100644 index 00000000..cc0d9db2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <tile_traits_standard.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     Basic thread offset function computed from a thread shape. More...
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TiledThreadOffset-members.html b/docs/generated-html/structcutlass_1_1TiledThreadOffset-members.html new file mode 100644 index 00000000..bc28ca5d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TiledThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TiledThreadOffset< ThreadShape > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TiledThreadOffset< ThreadShape >, including all inherited members.

    + + +
    operator()() constcutlass::TiledThreadOffset< ThreadShape >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TiledThreadOffset.html b/docs/generated-html/structcutlass_1_1TiledThreadOffset.html new file mode 100644 index 00000000..dbbccc1f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TiledThreadOffset.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::TiledThreadOffset< ThreadShape > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TiledThreadOffset< ThreadShape > Struct Template Reference
    +
    +
    + +

    Basic thread offset function computed from a thread shape. +

    + +

    #include <tile_traits_standard.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     Computes the logical coordinate from thread shape. More...
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename ThreadShape >
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::TiledThreadOffset< ThreadShape >::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter-members.html b/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter-members.html new file mode 100644 index 00000000..1bb156f6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TrivialPredicateTileAdapter Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TrivialPredicateTileAdapter, including all inherited members.

    + + + +
    at(int, int, int, int) constcutlass::TrivialPredicateTileAdapterinline
    TrivialPredicateTileAdapter()cutlass::TrivialPredicateTileAdapterinline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter.html b/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter.html new file mode 100644 index 00000000..f93bb89c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter.html @@ -0,0 +1,183 @@ + + + + + + + +Cutlass: cutlass::TrivialPredicateTileAdapter Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TrivialPredicateTileAdapter Struct Reference
    +
    +
    + +

    Always returns true predicate. +

    + +

    #include <predicate_vector.h>

    + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE TrivialPredicateTileAdapter ()
     Ctor. More...
     
    CUTLASS_HOST_DEVICE bool at (int, int, int, int) const
     The value at location (d, h, w, c). More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ TrivialPredicateTileAdapter()

    + +
    +
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TrivialPredicateTileAdapter::TrivialPredicateTileAdapter ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at()

    + +
    +
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::TrivialPredicateTileAdapter::at (int ,
    int ,
    int ,
    int  
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits-members.html b/docs/generated-html/structcutlass_1_1VectorTraits-members.html new file mode 100644 index 00000000..011de7e9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::VectorTraits< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::VectorTraits< T >, including all inherited members.

    + + + + + +
    IsVectorcutlass::VectorTraits< T >static
    kLanescutlass::VectorTraits< T >static
    Scalar typedefcutlass::VectorTraits< T >
    Vector typedefcutlass::VectorTraits< T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits.html b/docs/generated-html/structcutlass_1_1VectorTraits.html new file mode 100644 index 00000000..80070a5b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits.html @@ -0,0 +1,200 @@ + + + + + + + +Cutlass: cutlass::VectorTraits< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::VectorTraits< T > Struct Template Reference
    +
    +
    + +

    Traits describing properties of vectors and scalar-as-vectors. +

    + +

    #include <vector.h>

    + + + + + + + + +

    +Public Types

    typedef T Scalar
     Scalar type. More...
     
    typedef Vector< T, 1 > Vector
     Type that is always a vector. More...
     
    + + + + + + + +

    +Static Public Attributes

    static int const kLanes = 1
     Number of lanes of vector. More...
     
    static bool const IsVector = false
     True if the type is actually a cutlass::Vector, otherwise false. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef T cutlass::VectorTraits< T >::Scalar
    +
    + +
    +
    + +

    ◆ Vector

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef Vector<T, 1> cutlass::VectorTraits< T >::Vector
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ IsVector

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + +
    bool const cutlass::VectorTraits< T >::IsVector = false
    +
    +static
    +
    + +
    +
    + +

    ◆ kLanes

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + +
    int const cutlass::VectorTraits< T >::kLanes = 1
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html new file mode 100644 index 00000000..e5e2d780 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::VectorTraits< Vector< T, Lanes > > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html new file mode 100644 index 00000000..39561291 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html @@ -0,0 +1,200 @@ + + + + + + + +Cutlass: cutlass::VectorTraits< Vector< T, Lanes > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::VectorTraits< Vector< T, Lanes > > Struct Template Reference
    +
    +
    + +

    Partial specialization for actual cutlass::Vector. +

    + +

    #include <vector.h>

    + + + + + + + + +

    +Public Types

    typedef T Scalar
     Scalar type. More...
     
    typedef Vector< T, Lanes > Vector
     Type that is always a Vector. More...
     
    + + + + + + + +

    +Static Public Attributes

    static int const kLanes = Lanes
     Number of lanes of vector. More...
     
    static bool const IsVector = true
     Type is actually a cutlass::Vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + +
    typedef T cutlass::VectorTraits< Vector< T, Lanes > >::Scalar
    +
    + +
    +
    + +

    ◆ Vector

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + +
    typedef Vector<T, Lanes> cutlass::VectorTraits< Vector< T, Lanes > >::Vector
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ IsVector

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    bool const cutlass::VectorTraits< Vector< T, Lanes > >::IsVector = true
    +
    +static
    +
    + +
    +
    + +

    ◆ kLanes

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    int const cutlass::VectorTraits< Vector< T, Lanes > >::kLanes = Lanes
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html new file mode 100644 index 00000000..a038a431 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::VectorTraits< Vector< T, Lanes > const > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html new file mode 100644 index 00000000..7f9a5743 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html @@ -0,0 +1,200 @@ + + + + + + + +Cutlass: cutlass::VectorTraits< Vector< T, Lanes > const > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::VectorTraits< Vector< T, Lanes > const > Struct Template Reference
    +
    +
    + +

    Partial specialization for actual cutlass::Vector. +

    + +

    #include <vector.h>

    + + + + + + + + +

    +Public Types

    typedef T Scalar
     Scalar type. More...
     
    typedef Vector< T, Lanes > Vector
     Type that is always a Vector. More...
     
    + + + + + + + +

    +Static Public Attributes

    static int const kLanes = Lanes
     Number of lanes of vector. More...
     
    static bool const IsVector = true
     Type is actually a cutlass::Vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + +
    typedef T cutlass::VectorTraits< Vector< T, Lanes > const >::Scalar
    +
    + +
    +
    + +

    ◆ Vector

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + +
    typedef Vector<T, Lanes> cutlass::VectorTraits< Vector< T, Lanes > const >::Vector
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ IsVector

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    bool const cutlass::VectorTraits< Vector< T, Lanes > const >::IsVector = true
    +
    +static
    +
    + +
    +
    + +

    ◆ kLanes

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    int const cutlass::VectorTraits< Vector< T, Lanes > const >::kLanes = Lanes
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Vectorize-members.html b/docs/generated-html/structcutlass_1_1Vectorize-members.html new file mode 100644 index 00000000..2f3903bd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Vectorize-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Vectorize< Element_, kLanes_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Vectorize< Element_, kLanes_ >, including all inherited members.

    + + +
    Type typedefcutlass::Vectorize< Element_, kLanes_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Vectorize.html b/docs/generated-html/structcutlass_1_1Vectorize.html new file mode 100644 index 00000000..d728c0a2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Vectorize.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::Vectorize< Element_, kLanes_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Vectorize< Element_, kLanes_ > Struct Template Reference
    +
    +
    + +

    #include <vector.h>

    + + + + +

    +Public Types

    typedef Vector< Element_, kLanes_ > Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    +
    +template<typename Element_, int kLanes_ = 1>
    + + + + +
    typedef Vector<Element_, kLanes_> cutlass::Vectorize< Element_, kLanes_ >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4-members.html b/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4-members.html new file mode 100644 index 00000000..8f2e996d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Vectorize< Element_, 1 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Vectorize< Element_, 1 >, including all inherited members.

    + + +
    Type typedefcutlass::Vectorize< Element_, 1 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html b/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html new file mode 100644 index 00000000..ca9f3310 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::Vectorize< Element_, 1 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Vectorize< Element_, 1 > Struct Template Reference
    +
    +
    + +

    #include <vector.h>

    + + + + +

    +Public Types

    typedef Element_ Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    +
    +template<typename Element_ >
    + + + + +
    typedef Element_ cutlass::Vectorize< Element_, 1 >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1divide__assert-members.html b/docs/generated-html/structcutlass_1_1divide__assert-members.html new file mode 100644 index 00000000..59e5af79 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1divide__assert-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::divide_assert< Dividend, Divisor > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::divide_assert< Dividend, Divisor >, including all inherited members.

    + + +
    value enum valuecutlass::divide_assert< Dividend, Divisor >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1divide__assert.html b/docs/generated-html/structcutlass_1_1divide__assert.html new file mode 100644 index 00000000..f7dd6690 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1divide__assert.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: cutlass::divide_assert< Dividend, Divisor > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::divide_assert< Dividend, Divisor > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + +

    +Public Types

    enum  { value = Dividend / Divisor + }
     
    +

    Detailed Description

    +

    template<int Dividend, int Divisor>
    +struct cutlass::divide_assert< Dividend, Divisor >

    + +

    For performing a constant-division with a compile-time assertion that the Divisor evenly-divides the Dividend.

    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int Dividend, int Divisor>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators-members.html new file mode 100644 index 00000000..c3f2e3e9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >, including all inherited members.

    + + + +
    clear(Fragment_ &fragment)cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >inline
    ClearAccumulators(SharedStorage &shared_storage)cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators.html b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators.html new file mode 100644 index 00000000..e815e57d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ > Struct Template Reference
    +
    +
    + +

    #include <clear_accumulators.h>

    + + + + + +

    +Classes

    struct  SharedStorage
     The shared storage. More...
     
    + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE ClearAccumulators (SharedStorage &shared_storage)
     Ctor. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void clear (Fragment_ &fragment)
     Clear the fragment. More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ ClearAccumulators()

    + +
    +
    +
    +template<typename Scalar_ , int kLanes_ = 1>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::ClearAccumulators (SharedStorageshared_storage)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ clear()

    + +
    +
    +
    +template<typename Scalar_ , int kLanes_ = 1>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::clear (Fragment_ & fragment)
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html new file mode 100644 index 00000000..b97be88f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorage Struct Reference
    +
    +
    + +

    The shared storage. +

    + +

    #include <clear_accumulators.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig-members.html new file mode 100644 index 00000000..256b383d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    InstructionShape typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kStagescutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kThreadscutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kWarpSizecutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    OutputTile typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    ScalarA typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    ScalarB typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    ScalarC typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    ScalarD typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    Warps typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.html b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.html new file mode 100644 index 00000000..7ac04112 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.html @@ -0,0 +1,177 @@ + + + + + + + +Cutlass: cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference
    +
    +
    + +

    #include <dgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >:
    +
    +
    + + +cutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    typedef double ScalarA
     The scalar for A. More...
     
    typedef double ScalarB
     The scalar for B. More...
     
    typedef double ScalarC
     The scalar for C. More...
     
    typedef double ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double > MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    static int const kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA
     
    static int const kScalarsPerLdsA
     
    static int const kScalarsPerLdgB
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB
     
    static int const kScalarsPerLdsB
     
    static int const kScalarsPerLdgC
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD
     
    static int const kScalarsPerLdsD
     
    static int const kAccumulatorsPerLdsA
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB
     
    static int const kStages
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.png b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.png new file mode 100644 index 00000000..0769b899 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits-members.html new file mode 100644 index 00000000..2fd59e58 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits-members.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Epilogue typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GemmConfig typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Index typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    kLayoutAcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    kLayoutBcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    OutputTile typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarC typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarD typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.html new file mode 100644 index 00000000..2832466c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Struct Template Reference
    +
    +
    + +

    #include <dgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ >:
    +
    +
    + + +cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, GemmEpilogue< GemmEpilogueTraits_ >, Index_ > +cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    typedef GemmConfig_ GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA ::Scalar ScalarA
     The scalar for A. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB ::Scalar ScalarB
     The scalar for B. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef GemmEpilogue< GemmEpilogueTraits_ > Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef IdentityBlockSwizzle BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef ClearAccumulators< GemmConfig_::Accumulators::Element > ClearAccumulators
     Clear the accumulators. More...
     
    - Static Public Member Functions inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static MatrixLayout::Kind const kLayoutA
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB
     The layout of B. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.png new file mode 100644 index 00000000..151b3c5a Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd-members.html new file mode 100644 index 00000000..f03e26ac --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::FragmentMultiplyAdd< Scalar_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html new file mode 100644 index 00000000..bde87a6e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html @@ -0,0 +1,318 @@ + + + + + + + +Cutlass: cutlass::gemm::FragmentMultiplyAdd< Scalar_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::FragmentMultiplyAdd< Scalar_ > Struct Template Reference
    +
    +
    + +

    #include <fragment_multiply_add.h>

    + + + + + + + + + + + + + + +

    +Public Types

    typedef Shape< 1, 1, 1, 1 > InstructionShape
     The shape of the instruction. More...
     
    typedef Scalar_ ScalarA
     The type for A. More...
     
    typedef Scalar_ ScalarB
     The type for B. More...
     
    typedef Scalar_ ScalarC
     The type for C and D. More...
     
    + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE FragmentMultiplyAdd ()
     Ctor. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void multiply (Scalar_ a, Fragment_ const &b, Fragment_ &d)
     Multiply : d = a*b. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void multiply_add (Scalar_ a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
     Multiply : d = a*b + c. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InstructionShape

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + +
    typedef Shape<1, 1, 1, 1> cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::InstructionShape
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + +
    typedef Scalar_ cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + +
    typedef Scalar_ cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + +
    typedef Scalar_ cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::ScalarC
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ FragmentMultiplyAdd()

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::FragmentMultiplyAdd ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ multiply()

    + +
    +
    +
    +template<typename Scalar_ >
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::multiply (Scalar_ a,
    Fragment_ const & b,
    Fragment_ & d 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename Scalar_ >
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::multiply_add (Scalar_ a,
    Fragment_ const & b,
    Fragment_ const & c,
    Fragment_ & d 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4-members.html new file mode 100644 index 00000000..32953dab --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::FragmentMultiplyAdd< half > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html new file mode 100644 index 00000000..141354c2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html @@ -0,0 +1,304 @@ + + + + + + + +Cutlass: cutlass::gemm::FragmentMultiplyAdd< half > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::FragmentMultiplyAdd< half > Struct Template Reference
    +
    +
    + +

    #include <fragment_multiply_add.h>

    + + + + + + + + + + + + + + +

    +Public Types

    typedef Shape< 1, 1, 1, 1 > InstructionShape
     The shape of the instruction. More...
     
    typedef half ScalarA
     The type for A. More...
     
    typedef half ScalarB
     The type for B. More...
     
    typedef half ScalarC
     The type for C and D. More...
     
    + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE FragmentMultiplyAdd ()
     Ctor. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void multiply (half a, Fragment_ const &b, Fragment_ &d)
     Multiply : d = a*b. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void multiply_add (half a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
     Multiply : d = a*b + c. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InstructionShape

    + +
    +
    + + + + +
    typedef Shape<1, 1, 1, 1> cutlass::gemm::FragmentMultiplyAdd< half >::InstructionShape
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    + + + + +
    typedef half cutlass::gemm::FragmentMultiplyAdd< half >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    + + + + +
    typedef half cutlass::gemm::FragmentMultiplyAdd< half >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    + + + + +
    typedef half cutlass::gemm::FragmentMultiplyAdd< half >::ScalarC
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ FragmentMultiplyAdd()

    + +
    +
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::FragmentMultiplyAdd< half >::FragmentMultiplyAdd ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ multiply()

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::FragmentMultiplyAdd< half >::multiply (half a,
    Fragment_ const & b,
    Fragment_ & d 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::FragmentMultiplyAdd< half >::multiply_add (half a,
    Fragment_ const & b,
    Fragment_ const & c,
    Fragment_ & d 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1Gemm-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm-members.html new file mode 100644 index 00000000..f0424e29 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::Gemm< GemmTraits_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1Gemm.html b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm.html new file mode 100644 index 00000000..c2f993ef --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm.html @@ -0,0 +1,522 @@ + + + + + + + +Cutlass: cutlass::gemm::Gemm< GemmTraits_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::Gemm< GemmTraits_ > Struct Template Reference
    +
    +
    + +

    #include <gemm.h>

    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Gemm< GemmTraits_ > This_
     This class. More...
     
    typedef GemmTraits_ Traits
     The traits. More...
     
    typedef Traits::SharedStorage SharedStorage
     The shared storage. More...
     
    typedef Traits::ScalarA ScalarA
     The scalar for A. More...
     
    typedef Traits::ScalarB ScalarB
     The scalar for B. More...
     
    typedef Traits::Epilogue::Scalar ScalarEpilogue
     The scalar in the epilogue. More...
     
    typedef Traits::Epilogue::ScalarC ScalarC
     The scalar for C. More...
     
    typedef Traits::Epilogue::ScalarD ScalarD
     The scalar for D. More...
     
    typedef Traits::Index Index
     The index. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE Gemm (Params const &params_, SharedStorage &shared_storage_)
     Ctor. More...
     
    CUTLASS_DEVICE void multiply_add ()
     Do the GEMM. More...
     
    + + + + + + + +

    +Static Public Member Functions

    static __host__ cudaError_t launch (Params const &params, cudaStream_t stream=cudaStreamDefault)
     Launch the kernel. More...
     
    static __host__ cudaError_t launch (CUfunction kernel, Params const &params, CUstream stream=CU_STREAM_LEGACY)
     Launch the kernel. More...
     
    + + + + + + + +

    +Public Attributes

    Params const & params
     The params. More...
     
    SharedStorageshared_storage
     The shared storage. More...
     
    + + + + +

    +Static Public Attributes

    static int const kThreads = Traits::GemmConfig::kThreads
     The number of threads. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Index

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::Index cutlass::gemm::Gemm< GemmTraits_ >::Index
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::ScalarA cutlass::gemm::Gemm< GemmTraits_ >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::ScalarB cutlass::gemm::Gemm< GemmTraits_ >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::Epilogue::ScalarC cutlass::gemm::Gemm< GemmTraits_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ScalarD

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::Epilogue::ScalarD cutlass::gemm::Gemm< GemmTraits_ >::ScalarD
    +
    + +
    +
    + +

    ◆ ScalarEpilogue

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::Epilogue::Scalar cutlass::gemm::Gemm< GemmTraits_ >::ScalarEpilogue
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::SharedStorage cutlass::gemm::Gemm< GemmTraits_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Gemm<GemmTraits_> cutlass::gemm::Gemm< GemmTraits_ >::This_
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef GemmTraits_ cutlass::gemm::Gemm< GemmTraits_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ Gemm()

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::Gemm< GemmTraits_ >::Gemm (Params const & params_,
    SharedStorageshared_storage_ 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ launch() [1/2]

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    static __host__ cudaError_t cutlass::gemm::Gemm< GemmTraits_ >::launch (Params const & params,
    cudaStream_t stream = cudaStreamDefault 
    )
    +
    +inlinestatic
    +
    + +
    +
    + +

    ◆ launch() [2/2]

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static __host__ cudaError_t cutlass::gemm::Gemm< GemmTraits_ >::launch (CUfunction kernel,
    Params const & params,
    CUstream stream = CU_STREAM_LEGACY 
    )
    +
    +inlinestatic
    +
    + +
    +
    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::Gemm< GemmTraits_ >::multiply_add ()
    +
    +inline
    +
    +

    Define the mainloop iteration size

    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + +
    int const cutlass::gemm::Gemm< GemmTraits_ >::kThreads = Traits::GemmConfig::kThreads
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    Params const& cutlass::gemm::Gemm< GemmTraits_ >::params
    +
    + +
    +
    + +

    ◆ shared_storage

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    SharedStorage& cutlass::gemm::Gemm< GemmTraits_ >::shared_storage
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig-members.html new file mode 100644 index 00000000..18c258d7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    InstructionShape typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kStagescutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kThreadscutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kWarpSizecutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    OutputTile typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    ScalarA typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    ScalarB typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    ScalarC typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    ScalarD typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    Warps typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig.html new file mode 100644 index 00000000..3bc9b65f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig.html @@ -0,0 +1,693 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef ScalarA_ ScalarA
     The scalar for A. More...
     
    typedef ScalarB_ ScalarB
     The scalar for B. More...
     
    typedef ScalarC_ ScalarC
     The scalar for C. More...
     
    typedef ScalarD_ ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef MultiplyAdd_ MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kWarpSize = cutlass::kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA = kScalarsPerLdgA_
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA = kScalarsPerStsA_
     
    static int const kScalarsPerLdsA = kScalarsPerLdsA_
     
    static int const kScalarsPerLdgB = kScalarsPerLdgB_
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB = kScalarsPerStsB_
     
    static int const kScalarsPerLdsB = kScalarsPerLdsB_
     
    static int const kScalarsPerLdgC = kScalarsPerLdgCAndStgD_
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD = kScalarsPerLdgCAndStgD_
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD = kScalarsPerStsD_
     
    static int const kScalarsPerLdsD = kScalarsPerLdsD_
     
    static int const kAccumulatorsPerLdsA = kScalarsPerLdsA / InstructionShape::kD
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB = kScalarsPerLdsB / InstructionShape::kD
     
    static int const kStages = kStages_
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef MultiplyAdd::Accumulators cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::Accumulators
    +
    + +
    +
    + +

    ◆ AccumulatorsPerWarp

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef MultiplyAdd::AccumulatorsPerWarp cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::AccumulatorsPerWarp
    +
    + +
    +
    + +

    ◆ InstructionShape

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef MultiplyAdd::InstructionShape cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::InstructionShape
    +
    + +
    +
    + +

    ◆ MultiplyAdd

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef MultiplyAdd_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::MultiplyAdd
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::OutputTile
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef ScalarA_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef ScalarB_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef ScalarC_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ScalarD

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef ScalarD_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::ScalarD
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef ShapeDiv<OutputTile, AccumulatorsPerWarp>::Shape cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccumulatorsPerLdsA

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kAccumulatorsPerLdsA = kScalarsPerLdsA / InstructionShape::kD
    +
    +static
    +
    + +
    +
    + +

    ◆ kAccumulatorsPerLdsB

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kAccumulatorsPerLdsB = kScalarsPerLdsB / InstructionShape::kD
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdgA

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdgA = kScalarsPerLdgA_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdgB

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdgB = kScalarsPerLdgB_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdgC

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdgC = kScalarsPerLdgCAndStgD_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdsA

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdsA = kScalarsPerLdsA_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdsB

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdsB = kScalarsPerLdsB_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdsD

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdsD = kScalarsPerLdsD_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerStgD

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerStgD = kScalarsPerLdgCAndStgD_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerStsA

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerStsA = kScalarsPerStsA_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerStsB

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerStsB = kScalarsPerStsB_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerStsD

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerStsD = kScalarsPerStsD_
    +
    +static
    +
    + +
    +
    + +

    ◆ kStages

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kStages = kStages_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpSize

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kWarpSize = cutlass::kWarpSize
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc-members.html new file mode 100644 index 00000000..5c3b045a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc-members.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmDesc< Scalar_, Index_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc.html new file mode 100644 index 00000000..9f4c8fd0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc.html @@ -0,0 +1,344 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmDesc< Scalar_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmDesc< Scalar_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Index_ m
     The dimensions of the GEMM. More...
     
    Index_ n
     
    Index_ k
     
    Scalar_ alpha
     The alpha/beta scaling values. More...
     
    Scalar_ beta
     
    void const * d_a
     The source matrix A. More...
     
    Index_ lda
     The stride for A. More...
     
    void const * d_b
     The source matrix B. More...
     
    Index_ ldb
     The stride for B. More...
     
    void const * d_c
     The source matrix C. More...
     
    Index_ ldc
     The stride for C. More...
     
    void * d_d
     The destination matrix D. More...
     
    Index_ ldd
     The stride for D. More...
     
    +

    Member Data Documentation

    + +

    ◆ alpha

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Scalar_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::alpha
    +
    + +
    +
    + +

    ◆ beta

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Scalar_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::beta
    +
    + +
    +
    + +

    ◆ d_a

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    void const* cutlass::gemm::GemmDesc< Scalar_, Index_ >::d_a
    +
    + +
    +
    + +

    ◆ d_b

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    void const* cutlass::gemm::GemmDesc< Scalar_, Index_ >::d_b
    +
    + +
    +
    + +

    ◆ d_c

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    void const* cutlass::gemm::GemmDesc< Scalar_, Index_ >::d_c
    +
    + +
    +
    + +

    ◆ d_d

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    void* cutlass::gemm::GemmDesc< Scalar_, Index_ >::d_d
    +
    + +
    +
    + +

    ◆ k

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::k
    +
    + +
    +
    + +

    ◆ lda

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::lda
    +
    + +
    +
    + +

    ◆ ldb

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::ldb
    +
    + +
    +
    + +

    ◆ ldc

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::ldc
    +
    + +
    +
    + +

    ◆ ldd

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::ldd
    +
    + +
    +
    + +

    ◆ m

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::m
    +
    + +
    +
    + +

    ◆ n

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::n
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue-members.html new file mode 100644 index 00000000..6024711b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue-members.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    epilogue(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    epilogue_with_or_without_beta(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    Functor typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GemmEpilogue(Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Index typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Iterations typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    mcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ncutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    OutputTile typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    paramscutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Params typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Scalar typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_load_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    shared_storagecutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_store_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedLoadTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStorage typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Traits typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.html new file mode 100644 index 00000000..3f08c9cf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.html @@ -0,0 +1,755 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_epilogue.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >:
    +
    +
    + + +cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool > +cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmEpilogueTraits_ Traits
     The traits class. More...
     
    typedef Traits::Params Params
     The params. More...
     
    typedef Traits::SharedStorage SharedStorage
     The shared storage. More...
     
    typedef Traits::OutputTile OutputTile
     The output tile. More...
     
    typedef Traits::Iterations Iterations
     The number of iterations. More...
     
    typedef Traits::Accumulators Accumulators
     The accumulators. More...
     
    typedef Traits::Scalar Scalar
     The scalar. More...
     
    typedef Traits::Functor Functor
     The functor in charge of the math. More...
     
    typedef Traits::GlobalLoadIteratorC GlobalLoadIteratorC
     We do not support 3D or 4D shapes. More...
     
    typedef Traits::GlobalTransformerC GlobalTransformerC
     The transformer for C. More...
     
    typedef Traits::GlobalTransformerD GlobalTransformerD
     The transformer for D. More...
     
    typedef Traits::GlobalStoreIteratorD GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef Traits::SharedStoreIteratorD SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Traits::SharedStoreTransformerD SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef Traits::SharedLoadIteratorD SharedLoadIteratorD
     The iterator to load D in shared memory. More...
     
    typedef Copy< typename SharedLoadIteratorD::Fragment > SharedLoadTransformerD
     The shared load transformer for D. More...
     
    typedef Traits::Index Index
     The index. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GemmEpilogue (Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)
     Ctor. More...
     
    CUTLASS_DEVICE void epilogue (Coord< 3 > const &block, Accumulators &accumulators)
     Execute the epilogue. More...
     
    template<bool kBetaIsZero_>
    CUTLASS_DEVICE void epilogue_with_or_without_beta (Coord< 3 > const &block, Accumulators &accumulators)
     
    CUTLASS_DEVICE void shared_load_fence ()
     The memory fence for shared loads. More...
     
    CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    + + + + + + + + + + + + +

    +Public Attributes

    Params const & params
     The params. More...
     
    SharedStorageshared_storage
     The shared storage. More...
     
    Index m
     The dimensions of the GEMM. More...
     
    Index n
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Accumulators cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Accumulators
    +
    + +
    +
    + +

    ◆ Functor

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Functor cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Functor
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorC

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::GlobalLoadIteratorC cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalLoadIteratorC
    +
    +

    The iterator for C in global memory.

    + +
    +
    + +

    ◆ GlobalStoreIteratorD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::GlobalStoreIteratorD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalStoreIteratorD
    +
    + +
    +
    + +

    ◆ GlobalTransformerC

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::GlobalTransformerC cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalTransformerC
    +
    + +
    +
    + +

    ◆ GlobalTransformerD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::GlobalTransformerD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalTransformerD
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Index cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Index
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Iterations cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::OutputTile cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Params

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Params cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Params
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Scalar cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Scalar
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef GlobalLoadIteratorC::Scalar cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ScalarD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef GlobalStoreIteratorD::Scalar cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::ScalarD
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::SharedLoadIteratorD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedLoadIteratorD
    +
    + +
    +
    + +

    ◆ SharedLoadTransformerD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Copy<typename SharedLoadIteratorD::Fragment> cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedLoadTransformerD
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::SharedStorage cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::SharedStoreIteratorD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedStoreIteratorD
    +
    + +
    +
    + +

    ◆ SharedStoreTransformerD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::SharedStoreTransformerD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedStoreTransformerD
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef GemmEpilogueTraits_ cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GemmEpilogue()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GemmEpilogue (Params const & params_,
    SharedStorageshared_storage_,
    Index m_,
    Index n_ 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ epilogue()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::epilogue (Coord< 3 > const & block,
    Accumulatorsaccumulators 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ epilogue_with_or_without_beta()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    +
    +template<bool kBetaIsZero_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::epilogue_with_or_without_beta (Coord< 3 > const & block,
    Accumulatorsaccumulators 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ shared_load_fence()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::shared_load_fence ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ shared_store_fence()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::shared_store_fence ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ m

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    Index cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::m
    +
    + +
    +
    + +

    ◆ n

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    Index cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::n
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    Params const& cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::params
    +
    + +
    +
    + +

    ◆ shared_storage

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    SharedStorage& cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::shared_storage
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.png new file mode 100644 index 00000000..25ed6dab Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits-members.html new file mode 100644 index 00000000..2035e3bf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    Delta typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    Functor typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    Index typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    Iterations typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    OutputTile typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    Scalar typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    ScalarC typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    ScalarD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits.html new file mode 100644 index 00000000..8d99223d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits.html @@ -0,0 +1,418 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_epilogue_traits.h>

    + + + + + + + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    struct  SharedStorage
     The shared memory to swizzle the data in the epilogue. More...
     
    union  StreamSharedStorage
     The shared memory storage to exchange data. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef OutputTile_ OutputTile
     The output tile. More...
     
    typedef Accumulators_ Accumulators
     
    typedef GlobalLoadIteratorC_ GlobalLoadIteratorC
     The iterator for C in global memory. More...
     
    typedef GlobalTransformerC_ GlobalTransformerC
     The transformer for C. More...
     
    typedef GlobalTransformerD_ GlobalTransformerD
     The transformer for D. More...
     
    typedef GlobalStoreIteratorD_ GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef SharedStoreIteratorD_ SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef SharedStoreTransformerD_ SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef SharedLoadIteratorD_ SharedLoadIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Iterations_ Iterations
     typedef typename GemmConfig::EpilogueIterations Iterations; More...
     
    typedef Delta_ Delta
     The iterations strides. More...
     
    typedef Functor_ Functor
     The functor in charge of the math. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef Functor::Scalar Scalar
     We do not support 3D or 4D shapes. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Accumulators_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Accumulators
    +
    +

    The number of iterations. The accumulators.

    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Delta_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Delta
    +
    + +
    +
    + +

    ◆ Functor

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Functor_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Functor
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorC

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalLoadIteratorC_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::GlobalLoadIteratorC
    +
    + +
    +
    + +

    ◆ GlobalStoreIteratorD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalStoreIteratorD_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::GlobalStoreIteratorD
    +
    + +
    +
    + +

    ◆ GlobalTransformerC

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalTransformerC_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::GlobalTransformerC
    +
    + +
    +
    + +

    ◆ GlobalTransformerD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalTransformerD_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::GlobalTransformerD
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Index_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Index
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Iterations_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Functor::Scalar cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Scalar
    +
    +

    The scalar.

    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalLoadIteratorC::Scalar cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ScalarD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalStoreIteratorD::Scalar cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::ScalarD
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef SharedLoadIteratorD_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedLoadIteratorD
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef SharedStoreIteratorD_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStoreIteratorD
    +
    + +
    +
    + +

    ◆ SharedStoreTransformerD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef SharedStoreTransformerD_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStoreTransformerD
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper-members.html new file mode 100644 index 00000000..4f04d91f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    Functor typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalLoadTileTraits typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalStoreTileTraits typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    Iterations typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    OutputTile typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    Scalar typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    SharedLoadTileTraits typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreTileTraits typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html new file mode 100644 index 00000000..55d6652f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html @@ -0,0 +1,403 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_epilogue_traits.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef EpilogueFunctor_::Scalar Scalar
     The scalar. More...
     
    typedef GemmConfig_::OutputTile OutputTile
     The output tile. More...
     
    typedef Shape< 1, GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH/GemmConfig_::kAccumulatorsPerLdsB, GemmConfig_::kAccumulatorsPerLdsB > Iterations
     The number of iterations in the epilogue. More...
     
    typedef Shape< 0, GemmConfig_::kAccumulatorsPerLdsB *(GemmConfig_::Warps::kH *GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1), 0 > Delta
     
    typedef EpilogueFunctor_ Functor
     The functor to do the math in the epilogue. More...
     
    typedef GemmSharedStoreTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::kScalarsPerStsD, 128/sizeof(typename GemmConfig_::ScalarD)/GemmConfig_::kScalarsPerStsD/2 *GemmConfig_::kScalarsPerStsD > SharedStoreTileTraits
     The traits class to build the iterator to store to shared memory for D. More...
     
    typedef TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorD
     The iterator to store D to shared memory. More...
     
    typedef Copy< typename SharedStoreIteratorD::FragmentSharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef GemmSharedLoadTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::kScalarsPerLdsD, SharedStoreTileTraits::kSkewSharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for D. More...
     
    typedef TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorD
     The iterator to load D from shared memory. More...
     
    typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarC const, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerLdgC > GlobalLoadTileTraits
     The traits class to build the iterator to load data from global memory for C^N. More...
     
    typedef GemmGlobalIteratorCd< GlobalLoadTileTraits, Index_ > GlobalLoadIteratorC
     The iterator to load C. More...
     
    typedef Copy< typename GlobalLoadIteratorC::FragmentGlobalTransformerC
     The transformer for C. More...
     
    typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarD, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerStgD > GlobalStoreTileTraits
     The traits class to build the iterator to store data to global memory for D^N. More...
     
    typedef GemmGlobalIteratorCd< GlobalStoreTileTraits, Index_ > GlobalStoreIteratorD
     The iterator to store D. More...
     
    typedef Copy< typename GlobalStoreIteratorD::FragmentGlobalTransformerD
     The transformer for D. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef Shape<0, GemmConfig_::kAccumulatorsPerLdsB*( GemmConfig_::Warps::kH* GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1), 0> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Delta
    +
    + +
    +
    + +

    ◆ Functor

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef EpilogueFunctor_ cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Functor
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorC

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorCd<GlobalLoadTileTraits, Index_> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadIteratorC
    +
    + +
    +
    + +

    ◆ GlobalLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarC const, Shape<1, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, Iterations::kW, GemmConfig_::kScalarsPerLdgC> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadTileTraits
    +
    + +
    +
    + +

    ◆ GlobalStoreIteratorD

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorCd<GlobalStoreTileTraits, Index_> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreIteratorD
    +
    + +
    +
    + +

    ◆ GlobalStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarD, Shape<1, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, Iterations::kW, GemmConfig_::kScalarsPerStgD> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreTileTraits
    +
    + +
    +
    + +

    ◆ GlobalTransformerC

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef Copy<typename GlobalLoadIteratorC::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerC
    +
    + +
    +
    + +

    ◆ GlobalTransformerD

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef Copy<typename GlobalStoreIteratorD::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerD
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef Shape<1, GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH / GemmConfig_::kAccumulatorsPerLdsB, GemmConfig_::kAccumulatorsPerLdsB> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmConfig_::OutputTile cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef EpilogueFunctor_::Scalar cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorD

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadIteratorD
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmSharedLoadTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::kScalarsPerLdsD, SharedStoreTileTraits::kSkew> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorD

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef TileStoreIterator<SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreIteratorD
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmSharedStoreTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::kScalarsPerStsD, 128 / sizeof(typename GemmConfig_::ScalarD) / GemmConfig_::kScalarsPerStsD / 2 * GemmConfig_::kScalarsPerStsD> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTransformerD

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef Copy<typename SharedStoreIteratorD::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTransformerD
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params-members.html new file mode 100644 index 00000000..b6a1ec78 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params, including all inherited members.

    + + + + + + + + + +
    functorcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    initialize(GemmDesc_ const &desc)cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Paramsinline
    iterator_ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    iterator_dcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    shared_load_iterator_dcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    shared_store_iterator_dcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    stride_hcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    stride_wcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html new file mode 100644 index 00000000..c94e55e0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html @@ -0,0 +1,274 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_epilogue_traits.h>

    + + + + + + +

    +Public Member Functions

    template<typename GemmDesc_ >
    CUTLASS_HOST_DEVICE int initialize (GemmDesc_ const &desc)
     Setup the params. More...
     
    + + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Index stride_h
     The strides for H and W in the different iterations of the epilogue. More...
     
    Index stride_w
     
    GlobalLoadIteratorC::Params iterator_c
     The params for the C iterator. More...
     
    GlobalStoreIteratorD::Params iterator_d
     The params for the D global iterator. More...
     
    SharedStoreIteratorD::Params shared_store_iterator_d
     The params for the D shared store iterator. More...
     
    SharedLoadIteratorD::Params shared_load_iterator_d
     The params for the D shared load iterator. More...
     
    Functor::Params functor
     The functor params. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    +
    +template<typename GemmDesc_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::initialize (GemmDesc_ const & desc)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ functor

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    Functor::Params cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::functor
    +
    + +
    +
    + +

    ◆ iterator_c

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    GlobalLoadIteratorC::Params cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::iterator_c
    +
    + +
    +
    + +

    ◆ iterator_d

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    GlobalStoreIteratorD::Params cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::iterator_d
    +
    + +
    +
    + +

    ◆ shared_load_iterator_d

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    SharedLoadIteratorD::Params cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::shared_load_iterator_d
    +
    + +
    +
    + +

    ◆ shared_store_iterator_d

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    SharedStoreIteratorD::Params cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::shared_store_iterator_d
    +
    + +
    +
    + +

    ◆ stride_h

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::stride_h
    +
    + +
    +
    + +

    ◆ stride_w

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::stride_w
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage-members.html new file mode 100644 index 00000000..4856ef61 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html new file mode 100644 index 00000000..4cad48c7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage Struct Reference
    +
    +
    + +

    The shared memory to swizzle the data in the epilogue. +

    + +

    #include <gemm_epilogue_traits.h>

    + + + + +

    +Public Attributes

    StreamSharedStorage shared_stream
     
    +

    Member Data Documentation

    + +

    ◆ shared_stream

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    StreamSharedStorage cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage::shared_stream
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream-members.html new file mode 100644 index 00000000..baa0c5cf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream-members.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmFragmentStream< Traits_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmFragmentStream< Traits_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    commit()cutlass::gemm::GemmFragmentStream< Traits_ >inline
    convertcutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    Convert typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    fetchcutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    Fragment typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    FragmentStream()cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >inline
    FragmentStream(Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >inline
    GemmFragmentStream()cutlass::gemm::GemmFragmentStream< Traits_ >inline
    GemmFragmentStream(Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::gemm::GemmFragmentStream< Traits_ >inline
    Index typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    initialize_predicates(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)cutlass::gemm::GemmFragmentStream< Traits_ >inline
    load()cutlass::gemm::GemmFragmentStream< Traits_ >inline
    load_iteratorcutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    LoadIterator typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    Pointer typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    predicatescutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    residue(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)cutlass::gemm::GemmFragmentStream< Traits_ >inline
    Scalar typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    shared_store_fence()cutlass::gemm::GemmFragmentStream< Traits_ >inlinestatic
    SharedStoreStorage typedefcutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    Storage typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    store_iteratorcutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    StoreFragment typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    StoreIterator typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    Traits typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.html new file mode 100644 index 00000000..72f8e314 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.html @@ -0,0 +1,652 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmFragmentStream< Traits_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmFragmentStream< Traits_ > Struct Template Reference
    +
    +
    + +

    GEMM Fragment Stream. +

    + +

    #include <gemm_fragment_stream.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmFragmentStream< Traits_ >:
    +
    +
    + + +cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > > + +
    + + + + + +

    +Classes

    struct  Params
     Parameters object. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Traits_ Traits
     Traits. More...
     
    typedef Traits::FragmentStream Base
     Base class. More...
     
    typedef Traits::Scalar Scalar
     Scalar type. More...
     
    typedef Base::LoadIterator LoadIterator
     Defines the load iterator. More...
     
    typedef Base::StoreIterator StoreIterator
     Defines the store iterator. More...
     
    typedef Base::Convert Convert
     Converts between tiles. More...
     
    typedef Base::Fragment Fragment
     Loaded fragment type. More...
     
    typedef Base::StoreFragment StoreFragment
     Stored fragment type. More...
     
    typedef Base::Storage Storage
     Destination storage. More...
     
    typedef Traits::Index Index
     Index type. More...
     
    typedef Traits::Scalar const * Pointer
     The pointer. More...
     
    - Public Types inherited from cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    typedef Traits_ Traits
     Defines traits of WMMA GEMM tile stream. More...
     
    typedef TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index > LoadIterator
     Defines the load iterator. More...
     
    typedef TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > StoreIterator
     Defines the store iterator. More...
     
    typedef FragmentCopy< typename TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > ::Fragment, typename TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index > ::FragmentConvert
     Converts between tiles. More...
     
    typedef int Index
     Index type. More...
     
    typedef LoadIterator::Fragment Fragment
     Loaded fragment type. More...
     
    typedef StoreIterator::Fragment StoreFragment
     Stored fragment type. More...
     
    typedef StoreIterator::Storage Storage
     Destination storage. More...
     
    typedef StoreIterator::Storage SharedStoreStorage
     The storage in shared memory. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GemmFragmentStream ()
     
    CUTLASS_DEVICE GemmFragmentStream (Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Constructor - bounds and block offset are aligned to GEMM coordinates (K, N, M) More...
     
    CUTLASS_DEVICE void load ()
     Loads the fragment. More...
     
    CUTLASS_DEVICE void commit ()
     Commits the fragment. More...
     
    CUTLASS_DEVICE void residue (Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
     TODO - Recomputes predicates and clears fetch registers. More...
     
    CUTLASS_DEVICE void initialize_predicates (Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
     Recomputes predicates aligned to GEMM coordinates (K, N, M) More...
     
    - Public Member Functions inherited from cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    CUTLASS_DEVICE FragmentStream ()
     
    CUTLASS_DEVICE FragmentStream (Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Constructor. More...
     
    CUTLASS_DEVICE void load ()
     Loads the fragment. More...
     
    CUTLASS_DEVICE void commit ()
     Commits the fragment. More...
     
    CUTLASS_DEVICE void initialize_predicates (Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
     Recomputes predicates. More...
     
    + + + + + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    - Static Public Member Functions inherited from cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    static CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Attributes inherited from cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    LoadIterator load_iterator
     Loads fragment from global memory. More...
     
    LoadIterator::PredicateVector predicates
     Predicate vector. More...
     
    StoreIterator store_iterator
     Stores fragment to shared memory. More...
     
    Fragment fetch
     Fragment fetched by load iterator. More...
     
    Convert convert
     Converts between load fragments and store fragments. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Traits::FragmentStream cutlass::gemm::GemmFragmentStream< Traits_ >::Base
    +
    + +
    +
    + +

    ◆ Convert

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::Convert cutlass::gemm::GemmFragmentStream< Traits_ >::Convert
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::Fragment cutlass::gemm::GemmFragmentStream< Traits_ >::Fragment
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Traits::Index cutlass::gemm::GemmFragmentStream< Traits_ >::Index
    +
    + +
    +
    + +

    ◆ LoadIterator

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::LoadIterator cutlass::gemm::GemmFragmentStream< Traits_ >::LoadIterator
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Traits::Scalar const* cutlass::gemm::GemmFragmentStream< Traits_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Traits::Scalar cutlass::gemm::GemmFragmentStream< Traits_ >::Scalar
    +
    + +
    +
    + +

    ◆ Storage

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::Storage cutlass::gemm::GemmFragmentStream< Traits_ >::Storage
    +
    + +
    +
    + +

    ◆ StoreFragment

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::StoreFragment cutlass::gemm::GemmFragmentStream< Traits_ >::StoreFragment
    +
    + +
    +
    + +

    ◆ StoreIterator

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::StoreIterator cutlass::gemm::GemmFragmentStream< Traits_ >::StoreIterator
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Traits_ cutlass::gemm::GemmFragmentStream< Traits_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GemmFragmentStream() [1/2]

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmFragmentStream< Traits_ >::GemmFragmentStream ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ GemmFragmentStream() [2/2]

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmFragmentStream< Traits_ >::GemmFragmentStream (Params const & params,
    Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0) 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmFragmentStream< Traits_ >::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmFragmentStream< Traits_ >::initialize_predicates (Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ load()

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmFragmentStream< Traits_ >::load ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ residue()

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmFragmentStream< Traits_ >::residue (Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ shared_store_fence()

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + +
    static CUTLASS_DEVICE void cutlass::gemm::GemmFragmentStream< Traits_ >::shared_store_fence ()
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.png new file mode 100644 index 00000000..c217a06c Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits-members.html new file mode 100644 index 00000000..8cc6a106 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits-members.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + +
    DestinationSkew typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    FragmentStream typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    Index typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    kAccessSizecutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >static
    kLayoutcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >static
    kThreadscutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >static
    kUsagecutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    Scalar typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    ScalarTile typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    ThreadBlockTile typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    TileTraits typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    VectorizedTile typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits.html new file mode 100644 index 00000000..861ce1d1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits.html @@ -0,0 +1,387 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ > Struct Template Reference
    +
    +
    + +

    Defines a FragmentStream by mapping GEMM dimensions onto contiguous and strided dimensions. +

    + +

    #include <gemm_fragment_stream.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Scalar_ Scalar
     Scalar data type. More...
     
    typedef ThreadBlockTile_ ThreadBlockTile
     Shape of the thread block tile (K, N, M) More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef ShapeDiv< DestinationSkew_, Shape< ScalarsPerInst, ScalarsPerInst, ScalarsPerInst, 1 > >::Shape DestinationSkew
     Skew added to shared memory tile. More...
     
    typedef GemmMultiplicandTraits< ThreadBlockTile, kUsage, kLayoutMultiplicandTraits
     Traits of multiplicand. More...
     
    typedef MultiplicandTraits::Shape ScalarTile
     Scalar tile shape. More...
     
    typedef ReshapeTile< ScalarTile, kAccessSize >::Tile VectorizedTile
     Reshape for vectorized access. More...
     
    typedef TileTraitsDefault< VectorizedTile, kThreadsTileTraits
     Define structure of stripmined tile. More...
     
    typedef FragmentStream< TileTraits, TileLoadIterator< TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index >, TileStoreIterator< TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Index, Scalar, IteratorFragment::kScalar, DestinationSkew > > FragmentStream
     Define the tile stream. More...
     
    + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kUsage = Usage
     Indicates identity of multiplicand. More...
     
    static MatrixLayout::Kind const kLayout = Layout
     Layout of the operand. More...
     
    static int const kThreads = Threads
     Number of threads. More...
     
    static int const kAccessSize = ScalarsPerInst
     Scalars per instruction. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ DestinationSkew

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef ShapeDiv<DestinationSkew_, Shape<ScalarsPerInst, ScalarsPerInst, ScalarsPerInst, 1> >::Shape cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::DestinationSkew
    +
    + +
    +
    + +

    ◆ FragmentStream

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef FragmentStream< TileTraits, TileLoadIterator<TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH : IteratorAdvance::kW, MemorySpace::kGlobal, Index>, TileStoreIterator<TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH : IteratorAdvance::kW, MemorySpace::kShared, Index, Scalar, IteratorFragment::kScalar, DestinationSkew> > cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::FragmentStream
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Index_ cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::Index
    +
    + +
    +
    + +

    ◆ MultiplicandTraits

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef GemmMultiplicandTraits<ThreadBlockTile, kUsage, kLayout> cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::MultiplicandTraits
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Scalar_ cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ScalarTile

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef MultiplicandTraits::Shape cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::ScalarTile
    +
    + +
    +
    + +

    ◆ ThreadBlockTile

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef ThreadBlockTile_ cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::ThreadBlockTile
    +
    + +
    +
    + +

    ◆ TileTraits

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef TileTraitsDefault<VectorizedTile, kThreads> cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::TileTraits
    +
    + +
    +
    + +

    ◆ VectorizedTile

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef ReshapeTile<ScalarTile, kAccessSize>::Tile cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::VectorizedTile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::kAccessSize = ScalarsPerInst
    +
    +static
    +
    + +
    +
    + +

    ◆ kLayout

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::kLayout = Layout
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::kThreads = Threads
    +
    +static
    +
    + +
    +
    + +

    ◆ kUsage

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::kUsage = Usage
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params-members.html new file mode 100644 index 00000000..2d926367 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmFragmentStream< Traits_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmFragmentStream< Traits_ >::Params, including all inherited members.

    + + +
    initialize(GemmDesc_ const &desc, typename Traits::Scalar const *pointer, Index ldm)cutlass::gemm::GemmFragmentStream< Traits_ >::Paramsinline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.html new file mode 100644 index 00000000..3c3b6a4b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.html @@ -0,0 +1,161 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmFragmentStream< Traits_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmFragmentStream< Traits_ >::Params Struct Reference
    +
    +
    + +

    Parameters object. +

    + +

    #include <gemm_fragment_stream.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmFragmentStream< Traits_ >::Params:
    +
    +
    + + + +
    + + + + + + +

    +Public Member Functions

    template<typename GemmDesc_ >
    CUTLASS_HOST_DEVICE int initialize (GemmDesc_ const &desc, typename Traits::Scalar const *pointer, Index ldm)
     Initializes parameters. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Traits_ >
    +
    +template<typename GemmDesc_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GemmFragmentStream< Traits_ >::Params::initialize (GemmDesc_ const & desc,
    typename Traits::Scalar const * pointer,
    Index ldm 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.png new file mode 100644 index 00000000..ec9500ca Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb-members.html new file mode 100644 index 00000000..7e4746ea --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb-members.html @@ -0,0 +1,142 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    Base typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    BaseParams typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    data() constcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    Delta typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    Fragment typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    FragmentConstIterator typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    FragmentElement typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    FragmentIterator typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    FragmentShape typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    GemmGlobalIteratorAb(Params const &_params, const Coord< 3 > &bounds, const Coord< 3 > &block, ThreadOffset thread_offset_func=ThreadOffset())cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    ImmediateOffsetStrides typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    inc_advance()cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    inc_d()cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    inc_h()cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    inc_stage()cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    inc_w()cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    Index typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    initialize_predicates(const Coord< 3 > &bounds, const Coord< 3 > &block)cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >::initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    Iterations typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    kAccessSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >static
    kAdvancecutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >static
    kFragmentSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >static
    kIteratorFragmentcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >static
    kLayoutcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >static
    kMemorySpacecutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >static
    kRequiresLoadFence enum valuecutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    load(Fragment &fragment, PredicateIterator pred_it) constcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    load(Fragment &fragment) constcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    load_post_increment(Fragment &fragment, PredicateIterator pred_it)cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    load_post_increment(Fragment &fragment)cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    paramscutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    Pointer typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    predicatescutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    PredicateVector typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    residue(Index k)cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    Scalar typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    SharedStorage typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    Skew typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    stagecutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    Storage typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    This_ typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    thread_offsetcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    ThreadOffset typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    Threads typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    Tile typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    TileLoadIterator()cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    TileLoadIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    TileLoadIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    Traits typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    valid(int d, int h, int w, int c) constcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html new file mode 100644 index 00000000..4210572d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html @@ -0,0 +1,912 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >:
    +
    +
    + + +cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ > +cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > > + +
    + + + + +

    +Classes

    struct  Params
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalIteratorAb< TileTraits_, Index_ > This_
     This class. More...
     
    typedef TileLoadIterator< TileTraits_, typename TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ > Base
     The base class. More...
     
    typedef Base::Fragment Fragment
     Fragment type loaded by the iterator. More...
     
    typedef TileTraits_::Scalar Scalar
     The scalar. More...
     
    typedef TileTraits_::Threads Threads
     The threads. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef TileTraits_::ThreadOffset ThreadOffset
     The thread offset. More...
     
    typedef cutlass::PredicateVector< ShapeCount< typename Base::Iterations >::kCount > PredicateVector
     
    typedef Base::Params BaseParams
     Iterator parameters type. More...
     
    - Public Types inherited from cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    enum  
     Do we require a fence? More...
     
    typedef TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > > Base
     Base class. More...
     
    typedef Base::Traits Traits
     concept TileTraits More...
     
    typedef Base::Scalar Scalar
     Scalar element. More...
     
    typedef Base::FragmentElement FragmentElement
     Fragment element. More...
     
    typedef Base::Index Index
     Index type. More...
     
    typedef Base::Skew Skew
     Skew quantity. More...
     
    typedef Base::Tile Tile
     Tile shape. More...
     
    typedef Base::Delta Delta
     Delta. More...
     
    typedef Base::Iterations Iterations
     Iterations. More...
     
    typedef Base::ThreadOffset ThreadOffset
     ThreadOffset functor. More...
     
    typedef Base::FragmentShape FragmentShape
     Fragment type. More...
     
    typedef Base::AccessType AccessType
     Memory access type. More...
     
    typedef Base::Fragment Fragment
     Fragment definition. More...
     
    typedef Base::FragmentIterator FragmentIterator
     Fragment iterator definition. More...
     
    typedef Base::FragmentConstIterator FragmentConstIterator
     Fragment const iterator definition. More...
     
    typedef Base::PredicateVector PredicateVector
     Default predicate mask type. More...
     
    typedef Base::Storage SharedStorage
     Storage object that may be loaded from. More...
     
    typedef Base::Params BaseParams
     IteratorBase parameters. More...
     
    typedef Scalar const * Pointer
     The pointer type. More...
     
    - Public Types inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    typedef TileTraits_ Traits
     concept TileTraits More...
     
    typedef TileTraits_::Scalar Scalar
     Scalar element. More...
     
    typedef TileTraits_::Scalar FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Shape< 0, 0, 0, 0 > Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE void initialize_predicates (const Coord< 3 > &bounds, const Coord< 3 > &block)
     
    CUTLASS_DEVICE GemmGlobalIteratorAb (Params const &_params, const Coord< 3 > &bounds, const Coord< 3 > &block, ThreadOffset thread_offset_func=ThreadOffset())
     Ctor. More...
     
    CUTLASS_DEVICE void inc_h ()
     Increment the pointer in the H dimension. More...
     
    CUTLASS_DEVICE void inc_d ()
     Increment the pointer in the D dimension. More...
     
    CUTLASS_DEVICE void inc_advance ()
     Increment the pointer to move to the next iteration. More...
     
    CUTLASS_HOST_DEVICE Scalar const * data () const
     Returns the current pointer. More...
     
    CUTLASS_DEVICE void residue (Index k)
     That's the residue! Update the predicates. More...
     
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    - Public Member Functions inherited from cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    CUTLASS_HOST_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator ()
     Default constructor. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator (Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile load iterator. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator (Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile load iterator. More...
     
    CUTLASS_HOST_DEVICE Scalar const * data () const
     Returns the current pointer. More...
     
    CUTLASS_HOST_DEVICE void inc_d ()
     Increment in the D dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_h ()
     Increment in the H dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_w ()
     Increment in the W dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_advance ()
     Increment in the next dimension. More...
     
    CUTLASS_DEVICE void inc_stage ()
     Increment the stage. More...
     
    CUTLASS_HOST_DEVICE void load_post_increment (Fragment &fragment, PredicateIterator pred_it)
     Loads a fragment and advances the iterator to the next tile. More...
     
    CUTLASS_HOST_DEVICE void load_post_increment (Fragment &fragment)
     Loads a fragment and advances the iterator to the next tile. More...
     
    CUTLASS_HOST_DEVICE void load (Fragment &fragment, PredicateIterator pred_it) const
     Loads a fragment without advancing the iterator.. More...
     
    CUTLASS_HOST_DEVICE void load (Fragment &fragment) const
     Loads a fragment without advancing the iterator.. More...
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Coord< 4 > thread_offset
     Offset of an individual lane from the start of the tile. More...
     
    Params params
     The parameters. More...
     
    PredicateVector predicates
     The predicates. More...
     
    - Public Attributes inherited from cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    Params params
     Parameters structure. More...
     
    Coord< 4 > thread_offset
     Offset of an individual lane from the start of the tile. More...
     
    int stage
     Stage argument enables wrapping after some number of tiles have been loaded. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = TileTraits_::kLayout
     The layout. More...
     
    static IteratorAdvance::Kind const kAdvance = Base::kAdvance
     Specifies in which dimension post-increment accesses advance. More...
     
    - Static Public Attributes inherited from cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    static IteratorAdvance::Kind const kAdvance
     Specifies in which dimension post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment
     Specifies type of iterator fragment storage (Salar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace
     Source or destination memory space. More...
     
    - Static Public Attributes inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    static IteratorAdvance::Kind const kAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    + + + + + +

    +Additional Inherited Members

    - Static Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<TileTraits_, typename TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH : IteratorAdvance::kW, MemorySpace::kGlobal, Index_> cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Base
    +
    + +
    +
    + +

    ◆ BaseParams

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Base::Params cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::BaseParams
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Base::Fragment cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Fragment
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Index_ cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Index
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef cutlass::PredicateVector<ShapeCount<typename Base::Iterations>::kCount> cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::PredicateVector
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Scalar cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Scalar
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorAb<TileTraits_, Index_> cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::This_
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::ThreadOffset cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Threads cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Threads
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GemmGlobalIteratorAb()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::GemmGlobalIteratorAb (Params const & _params,
    const Coord< 3 > & bounds,
    const Coord< 3 > & block,
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Scalar const* cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_advance()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::inc_advance ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_d()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::inc_d ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_h()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::inc_h ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::initialize_predicates (const Coord< 3 > & bounds,
    const Coord< 3 > & block 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ residue()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::residue (Index k)
    +
    +inline
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAdvance

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + +
    IteratorAdvance::Kind const cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::kAdvance = Base::kAdvance
    +
    +static
    +
    + +
    +
    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::kLayout = TileTraits_::kLayout
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Params cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::params
    +
    + +
    +
    + +

    ◆ predicates

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    PredicateVector cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::predicates
    +
    + +
    +
    + +

    ◆ thread_offset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Coord<4> cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::thread_offset
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.png new file mode 100644 index 00000000..f6dfb595 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params-members.html new file mode 100644 index 00000000..71243aed --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params, including all inherited members.

    + + + + + + + + + + + + + + + + +
    inc_advancecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    initialize(Scalar const *ptr, Index stride_h)cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Paramsinline
    cutlass::TileLoadIterator::Params::initialize(SharedStorage const &storage)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileLoadIterator::Params::initialize()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    pointercutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html new file mode 100644 index 00000000..d4517b31 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html @@ -0,0 +1,193 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params Struct Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params:
    +
    +
    + + +cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params +cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params + +
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Scalar const *ptr, Index stride_h)
     Initializes params to load a strip-mined tile, given pointer and stride_h. More...
     
    - Public Member Functions inherited from cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    CUTLASS_HOST_DEVICE int initialize (SharedStorage const &storage)
     Initialize params to access storage object. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)
     Initializes params to access a raw pointer. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w)
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Attributes inherited from cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    Scalar const * pointer
     Pointer to memory. More...
     
    - Public Attributes inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    Index stride_d
     
    Index stride_h
     
    Index stride_w
     
    Index inc_d
     
    Index inc_h
     
    Index inc_w
     
    Index inc_advance
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params::initialize (Scalar const * ptr,
    Index stride_h 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.png new file mode 100644 index 00000000..0a3e71c3 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1SharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1SharedStorage.html new file mode 100644 index 00000000..47e5bcc0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1SharedStorage.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::SharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::SharedStorage Struct Reference
    +
    +
    + +

    The shared memory storage needed by the iterator. +

    + +

    #include <gemm_global_tile.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd-members.html new file mode 100644 index 00000000..e77b99eb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd-members.html @@ -0,0 +1,131 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Base typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    data()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    data() constcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    Delta typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Fragment typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentConstIterator typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentElement typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentIterator typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentShape typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    GemmGlobalIteratorCd()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    GemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int offset=0, int pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    ImmediateOffsetStrides typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    inc_advance()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_c()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_d()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_h()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_w()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    Index typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >inlinestatic
    Iterations typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    kAccessSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kAdvancecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kFragmentSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kIteratorFragmentcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kLayoutcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >static
    kMemorySpacecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    paramscutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    Pointer typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    predicatescutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    PredicateVector typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Scalar typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    Skew typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Storage typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    This_ typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    thread_offsetcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    ThreadOffset typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    Threads typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    Tile typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Traits typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    valid(int d, int h, int w, int c) constcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html new file mode 100644 index 00000000..6af47320 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html @@ -0,0 +1,783 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >:
    +
    +
    + + +cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > + +
    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalIteratorCd< TileTraits_, Index_ > This_
     This class. More...
     
    typedef TileIteratorBase< TileTraits_, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > Base
     The base class. More...
     
    typedef TileTraits_::Scalar Scalar
     The scalar. More...
     
    typedef TileTraits_::Pointer Pointer
     The pointer. More...
     
    typedef TileTraits_::Threads Threads
     The threads. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef TileTraits_::ThreadOffset ThreadOffset
     The thread offset. More...
     
    - Public Types inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    typedef TileTraits_ Traits
     concept TileTraits More...
     
    typedef TileTraits_::Scalar Scalar
     Scalar element. More...
     
    typedef TileTraits_::Scalar FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Shape< 0, 0, 0, 0 > Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GemmGlobalIteratorCd ()
     Ctor. More...
     
    CUTLASS_DEVICE GemmGlobalIteratorCd (Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int offset=0, int pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())
     Ctor. More...
     
    CUTLASS_DEVICE void inc_c ()
     Increment the pointer in the C dimension. More...
     
    CUTLASS_DEVICE void inc_w ()
     Increment the pointer in the W dimension. More...
     
    CUTLASS_DEVICE void inc_h ()
     Increment the pointer in the H dimension. More...
     
    CUTLASS_DEVICE void inc_d ()
     Increment the pointer in the D dimension. More...
     
    CUTLASS_DEVICE void inc_advance ()
     Increment the pointer to move to the next iteration. More...
     
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Test the validity of the iterator. More...
     
    CUTLASS_HOST_DEVICE Pointer data ()
     Returns the raw pointer. More...
     
    CUTLASS_HOST_DEVICE Pointer const data () const
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + + + + + +

    +Public Attributes

    Params params
     
    Coord< 4 > thread_offset
     Offset of an individual lane from the start of the tile. More...
     
    cutlass::PredicateVector< Base::Iterations::kW > predicates
     The predicates for the row. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = TileTraits_::kLayout
     The layout. More...
     
    - Static Public Attributes inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    static IteratorAdvance::Kind const kAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    + + + + + +

    +Additional Inherited Members

    - Static Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileIteratorBase<TileTraits_, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_> cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Base
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Index_ cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Index
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Pointer cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Scalar cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Scalar
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorCd<TileTraits_, Index_> cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::This_
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::ThreadOffset cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Threads cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Threads
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GemmGlobalIteratorCd() [1/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::GemmGlobalIteratorCd ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ GemmGlobalIteratorCd() [2/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::GemmGlobalIteratorCd (Params const & params,
    const Coord< 3 > & bounds,
    const Coord< 3 > & block,
    int offset = 0,
    int pred_offset = 0,
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data() [1/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Pointer cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::data ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ data() [2/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Pointer const cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_advance()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::inc_advance ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_c()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::inc_c ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_d()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::inc_d ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_h()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::inc_h ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_w()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::inc_w ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::kLayout = TileTraits_::kLayout
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Params cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::params
    +
    + +
    +
    + +

    ◆ predicates

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    cutlass::PredicateVector<Base::Iterations::kW> cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::predicates
    +
    + +
    +
    + +

    ◆ thread_offset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Coord<4> cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::thread_offset
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.png new file mode 100644 index 00000000..13e8ac2a Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits-members.html new file mode 100644 index 00000000..ddee2bf2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Delta typedefcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Iterations typedefcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kIsContiguouscutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kStrideHcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    ThreadsStrides typedefcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.html new file mode 100644 index 00000000..755f67e6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.html @@ -0,0 +1,280 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > Base
     The base class. More...
     
    typedef Shape< 0, 0, Base::Delta::kW, Base::Delta::kCDelta
     Override the strides in each dimension between different loads/stores. More...
     
    typedef Base::Iterations Iterations
     
    typedef Base::Threads Threads
     
    typedef Base::ThreadsStrides ThreadsStrides
     
    - Public Types inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsStrides
     The threads strides. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kStrideH = kStrideH_
     The stride in the H dimension. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    static GemmOperand::Kind const kOperand
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout
     The layout. More...
     
    static bool const kIsContiguous
     Is it A^N or B^T? More...
     
    static int const kAccessSize
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef GemmGlobalTileTraits<GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_> cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::Iterations cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Iterations
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::Threads cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::ThreadsStrides cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadsStrides
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kStrideH

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::kStrideH = kStrideH_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.png new file mode 100644 index 00000000..5f8b970e Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..9e253bc1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..171e1634 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params-members.html new file mode 100644 index 00000000..aa6c60c8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html new file mode 100644 index 00000000..9b30fd78 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html @@ -0,0 +1,298 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_global_tile.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Pointer pointer, Index ld, Index bound, Index epilogue_stride_w, Index epilogue_delta_w)
     Setup the params. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Pointer pointer
     The pointer. More...
     
    Index stride_h
     The stride in the H dimension to setup the thread in the block. More...
     
    Index inc_advance
     The strides to increment the pointer. More...
     
    Index inc_h
     
    Index predicate_inc_advance
     The strides to increment the predicate offset. More...
     
    Index predicate_inc_h
     
    Index predicate_offset
     The column offset to compute the predicate for the columns. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::initialize (Pointer pointer,
    Index ld,
    Index bound,
    Index epilogue_stride_w,
    Index epilogue_delta_w 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ inc_advance

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::inc_advance
    +
    + +
    +
    + +

    ◆ inc_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::inc_h
    +
    + +
    +
    + +

    ◆ pointer

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Pointer cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::pointer
    +
    + +
    +
    + +

    ◆ predicate_inc_advance

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_inc_advance
    +
    + +
    +
    + +

    ◆ predicate_inc_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_inc_h
    +
    + +
    +
    + +

    ◆ predicate_offset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_offset
    +
    + +
    +
    + +

    ◆ stride_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::stride_h
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1SharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1SharedStorage.html new file mode 100644 index 00000000..c390004d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1SharedStorage.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::SharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::SharedStorage Struct Reference
    +
    +
    + +

    The shared memory storage needed by the iterator. +

    + +

    #include <gemm_global_tile.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits-members.html new file mode 100644 index 00000000..92fd6a4c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Delta typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Iterations typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kStrideHcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    ThreadsDelta typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html new file mode 100644 index 00000000..3aed66b5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html @@ -0,0 +1,298 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > Base
     The base class. More...
     
    typedef Shape< 0, 0, Base::Delta::kW, Base::Delta::kCDelta
     Override the strides in each dimension between different loads/stores. More...
     
    typedef Base::Iterations Iterations
     
    typedef Base::Threads Threads
     
    typedef Base::ThreadsDelta ThreadsDelta
     
    typedef Base::ImmediateOffsetStrides ImmediateOffsetStrides
     
    - Public Types inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile shape. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads shape. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsDelta
     The relative offset between two elements in the H/W dimension in adjacent threads. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSizeImmediateOffsetStrides
     Strides for immediate offset computation. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kStrideH = kStrideH_
     The stride in the H dimension. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    static GemmOperand::Kind const kOperand
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout
     The layout. More...
     
    static int const kAccessSize
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef GemmGlobalTileTraits<GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_> cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::ImmediateOffsetStrides cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::Iterations cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Iterations
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::Threads cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsDelta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::ThreadsDelta cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadsDelta
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kStrideH

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::kStrideH = kStrideH_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.png new file mode 100644 index 00000000..e3cb008b Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..1510bcf2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..524a06aa --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits-members.html new file mode 100644 index 00000000..6225081e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits-members.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Iterations typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ThreadsDelta typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html new file mode 100644 index 00000000..4e61285e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html @@ -0,0 +1,400 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > +cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile shape. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads shape. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsDelta
     The relative offset between two elements in the H/W dimension in adjacent threads. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSizeImmediateOffsetStrides
     Strides for immediate offset computation. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kOperand = kOperand_
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout = kLayout_
     The layout. More...
     
    static int const kAccessSize = kAccessSize_
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kGlobal
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef Shape<0, Threads::kH, Threads::kW * kAccessSize> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef Shape<0, 0, Threads::kW * ThreadsDelta::kW, kAccessSize> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kAccessSize> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Iterations
    +
    + +
    +
    + +

    ◆ MultiplicandTraits

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef GemmMultiplicandTraits<Tile, kOperand, kLayout> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::MultiplicandTraits
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Scalar
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef ReshapeThreads<Tile, Threads_>::Threads cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsDelta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef Shape<1, 1, Tile::kC> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadsDelta
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef ReshapeTile<Tile_, kAccessSize_>::Tile cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::kAccessSize = kAccessSize_
    +
    +static
    +
    + +
    +
    + +

    ◆ kLayout

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::kLayout = kLayout_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::kMemorySpace = MemorySpace::kGlobal
    +
    +static
    +
    + +
    +
    + +

    ◆ kOperand

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::kOperand = kOperand_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.png new file mode 100644 index 00000000..4c9bada4 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..634804a6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..7b47addb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits-members.html new file mode 100644 index 00000000..db9bc1bc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html new file mode 100644 index 00000000..121fe8cc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html @@ -0,0 +1,228 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout > Struct Template Reference
    +
    +
    + +

    #include <gemm_operand.h>

    + + + + + + + + +

    +Public Types

    typedef ThreadBlockTile_ ThreadBlockTile
     Shape of GEMM thread block tile (K, N, M) More...
     
    typedef platform::conditional< kKstrided, Shape< 1, ThreadBlockTile::kD, GetExtent< Usage, ThreadBlockTile >::kExtent >, Shape< 1, GetExtent< Usage, ThreadBlockTile >::kExtent, ThreadBlockTile::kD > >::type Shape
     Map the ThreadBlockShape onto (kH, kW) dimensions for A and B operand. More...
     
    + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kUsage = Usage
     Identifies multiplicand. More...
     
    static MatrixLayout::Kind const kLayout = Layout
     Layout of tile. More...
     
    static bool const kKstrided = (kUsage == GemmOperand::kA ^ kLayout == MatrixLayout::kRowMajor)
     
    +

    Detailed Description

    +

    template<typename ThreadBlockTile_, GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    +struct cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >

    + +

    Determines the shape of a multiplicand tile in terms of strided (H) and contiguous (W) dimensions

    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename ThreadBlockTile_ , GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    + + + + +
    typedef platform::conditional< kKstrided, Shape<1, ThreadBlockTile::kD, GetExtent<Usage, ThreadBlockTile>::kExtent>, Shape<1, GetExtent<Usage, ThreadBlockTile>::kExtent, ThreadBlockTile::kD> >::type cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >::Shape
    +
    + +
    +
    + +

    ◆ ThreadBlockTile

    + +
    +
    +
    +template<typename ThreadBlockTile_ , GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    + + + + +
    typedef ThreadBlockTile_ cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >::ThreadBlockTile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kKstrided

    + +
    +
    +
    +template<typename ThreadBlockTile_ , GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    + + + + + +
    + + + + +
    bool const cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >::kKstrided = (kUsage == GemmOperand::kA ^ kLayout == MatrixLayout::kRowMajor)
    +
    +static
    +
    + +
    +
    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename ThreadBlockTile_ , GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >::kLayout = Layout
    +
    +static
    +
    + +
    +
    + +

    ◆ kUsage

    + +
    +
    +
    +template<typename ThreadBlockTile_ , GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >::kUsage = Usage
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb-members.html new file mode 100644 index 00000000..03950b5a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >, including all inherited members.

    + + +
    Congruouscutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html new file mode 100644 index 00000000..39721d95 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ > Struct Template Reference
    +
    +
    + +

    Helper to describe attributes of GEMM matrix operands. +

    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Attributes

    static const bool Congruous
     
    +

    Member Data Documentation

    + +

    ◆ Congruous

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_>
    + + + + + +
    + + + + +
    const bool cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >::Congruous
    +
    +static
    +
    +Initial value:
    =
    (kOperand_ == GemmOperand::kA ^ kLayout_ == MatrixLayout::kRowMajor)
    +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits-members.html new file mode 100644 index 00000000..5e175f55 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    kMemorySpacecutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kOperandcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerLdscutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kThreadsPerWarpcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kWarpscutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew_ typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithSkew typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits.html new file mode 100644 index 00000000..8851f0a9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits.html @@ -0,0 +1,463 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
     The tile without skew. More...
     
    typedef Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
     The tile with skew. More...
     
    typedef ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
     The tile without skew after reshaping. More...
     
    typedef ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
     The tile. More...
     
    typedef Warps_ Warps
     The number of warps. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in a warp. More...
     
    typedef Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarpIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kScalarsPerLds, 0 > Delta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kOperand = GemmOperand::kA
     
    static int const kScalarsPerLds = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kWarps = GetExtent<kOperand, Warps>::kExtent
     The number of warps. More...
     
    static int const kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
     The number of threads in one dimension of the warp. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kScalarsPerLds, 0> cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp > cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithSkew, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithoutSkew_, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    + +

    ◆ TileWithoutSkew_

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, OutputTile_::kD / InstructionShape_::kD, GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD> cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew_
    +
    + +
    +
    + +

    ◆ TileWithSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW + kSkew_> cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithSkew
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kOperand

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kOperand = GemmOperand::kA
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLds

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kScalarsPerLds = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kWarps = GetExtent<kOperand, Warps>::kExtent
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..7d022958 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset.html new file mode 100644 index 00000000..03c381f8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits-members.html new file mode 100644 index 00000000..e964f6ec --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    kMemorySpacecutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kOperandcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerLdscutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kThreadsPerWarpcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kWarpscutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew_ typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithSkew typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits.html new file mode 100644 index 00000000..a075d3ca --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits.html @@ -0,0 +1,463 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
     The tile without skew. More...
     
    typedef Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
     The tile with skew. More...
     
    typedef ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
     The tile without skew after reshaping. More...
     
    typedef ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
     The tile. More...
     
    typedef Warps_ Warps
     The number of warps. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in a warp. More...
     
    typedef Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarpIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kScalarsPerLds, 0 > Delta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kOperand = GemmOperand::kB
     
    static int const kScalarsPerLds = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kWarps = GetExtent<kOperand, Warps>::kExtent
     The number of warps. More...
     
    static int const kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
     The number of threads in one dimension of the warp. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kScalarsPerLds, 0> cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp > cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithSkew, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithoutSkew_, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    + +

    ◆ TileWithoutSkew_

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, OutputTile_::kD / InstructionShape_::kD, GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD> cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew_
    +
    + +
    +
    + +

    ◆ TileWithSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW + kSkew_> cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithSkew
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kOperand

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kOperand = GemmOperand::kB
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLds

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kScalarsPerLds = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kWarps = GetExtent<kOperand, Warps>::kExtent
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..ed251e53 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..0814240e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits-members.html new file mode 100644 index 00000000..0629b209 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits-members.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    kIterationsDcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kIterationsHcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kIterationsInHPerWarpcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerLdscutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerRowcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerThreadcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kThreadscutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    OutputTile typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Pointer typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits.html new file mode 100644 index 00000000..1cf22fdf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits.html @@ -0,0 +1,504 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef OutputTile_ OutputTile
     The dimension of the output tile. More...
     
    typedef Warps_ Warps
     The warps in the tile. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in the warps. More...
     
    typedef Shape< 1, 2, kScalarsPerRow/kScalarsPerLds, kScalarsPerLdsTile
     The tile. More...
     
    typedef Shape< kIterationsD, kIterationsH, OutputTile::kW/kWarpSize/kScalarsPerLdsIterations
     The number of iterations needed to store the tile. More...
     
    typedef Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kScalarsPerLdsDelta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kScalarsPerLds = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
     The number of scalars per thread. More...
     
    static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
     The number of threads. More...
     
    static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
     The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
     
    static int const kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount
     
    static int const kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2
     
    static int const kIterationsD = kIterationsInHPerWarp / kIterationsH
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<OutputTile::kW, kScalarsPerRow, kWarpSize * kScalarsPerLds> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kIterationsD, kIterationsH, OutputTile::kW / kWarpSize / kScalarsPerLds> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 2, kScalarsPerRow / kScalarsPerLds, kScalarsPerLds> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kIterationsD

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsD = kIterationsInHPerWarp / kIterationsH
    +
    +static
    +
    + +
    +
    + +

    ◆ kIterationsH

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2
    +
    +static
    +
    + +
    +
    + +

    ◆ kIterationsInHPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLds

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerLds = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerRow

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerThread

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..af32d154 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..f055988f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits-members.html new file mode 100644 index 00000000..a317e544 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits-members.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    kAccessSizecutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kOperandcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kThreadsPerWarpcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kWarpscutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew_ typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithSkew typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html new file mode 100644 index 00000000..27c32f35 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html @@ -0,0 +1,482 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
     The tile without skew. More...
     
    typedef Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
     The tile with skew. More...
     
    typedef ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
     The tile without skew after reshaping. More...
     
    typedef ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
     The tile. More...
     
    typedef Warps_ Warps
     The number of warps. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in a warp. More...
     
    typedef Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarpIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > Delta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kOperand = GemmOperand::kA
     
    static int const kAccessSize = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kWarps = GetExtent<kOperand, Warps>::kExtent
     The number of warps. More...
     
    static int const kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
     The number of threads in one dimension of the warp. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kAccessSize, 0> cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kAccessSize, 0> cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp > cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithSkew, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithoutSkew_, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    + +

    ◆ TileWithoutSkew_

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, OutputTile_::kD / InstructionShape_::kD, GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD> cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew_
    +
    + +
    +
    + +

    ◆ TileWithSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW + kSkew_> cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithSkew
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kAccessSize = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kOperand

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kOperand = GemmOperand::kA
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kWarps = GetExtent<kOperand, Warps>::kExtent
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..3e308db6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html new file mode 100644 index 00000000..0731bce8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits-members.html new file mode 100644 index 00000000..782aa841 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits-members.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    kAccessSizecutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kOperandcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kThreadsPerWarpcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kWarpscutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew_ typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithSkew typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html new file mode 100644 index 00000000..097ce43e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html @@ -0,0 +1,482 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
     The tile without skew. More...
     
    typedef Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
     The tile with skew. More...
     
    typedef ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
     The tile without skew after reshaping. More...
     
    typedef ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
     The tile. More...
     
    typedef Warps_ Warps
     The number of warps. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in a warp. More...
     
    typedef Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarpIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > Delta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kOperand = GemmOperand::kB
     
    static int const kAccessSize = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kWarps = GetExtent<kOperand, Warps>::kExtent
     The number of warps. More...
     
    static int const kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
     The number of threads in one dimension of the warp. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kAccessSize, 0> cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kAccessSize, 0> cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp > cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithSkew, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithoutSkew_, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    + +

    ◆ TileWithoutSkew_

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, OutputTile_::kD / InstructionShape_::kD, GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD> cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew_
    +
    + +
    +
    + +

    ◆ TileWithSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW + kSkew_> cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithSkew
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kAccessSize = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kOperand

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kOperand = GemmOperand::kB
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kWarps = GetExtent<kOperand, Warps>::kExtent
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..387441df --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..07f462ac --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits-members.html new file mode 100644 index 00000000..afc22fe8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits-members.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    kAccessSizecutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kIterationsDcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kIterationsHcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kIterationsInHPerWarpcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerRowcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerThreadcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kThreadscutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    OutputTile typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Pointer typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html new file mode 100644 index 00000000..043d8c3a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html @@ -0,0 +1,523 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef OutputTile_ OutputTile
     The dimension of the output tile. More...
     
    typedef Warps_ Warps
     The warps in the tile. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in the warps. More...
     
    typedef Shape< 1, 2, kScalarsPerRow/kAccessSize, kAccessSizeTile
     The tile. More...
     
    typedef Shape< kIterationsD, kIterationsH, OutputTile::kW/kWarpSize/kAccessSizeIterations
     The number of iterations needed to store the tile. More...
     
    typedef Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kAccessSizeImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kAccessSize = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
     The number of scalars per thread. More...
     
    static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
     The number of threads. More...
     
    static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
     The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
     
    static int const kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount
     
    static int const kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2
     
    static int const kIterationsD = kIterationsInHPerWarp / kIterationsH
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<OutputTile::kW, kScalarsPerRow, kWarpSize * kAccessSize> cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<OutputTile::kW, kScalarsPerRow, kWarpSize * kAccessSize> cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kIterationsD, kIterationsH, OutputTile::kW / kWarpSize / kAccessSize> cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 2, kScalarsPerRow / kAccessSize, kAccessSize> cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kAccessSize = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kIterationsD

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsD = kIterationsInHPerWarp / kIterationsH
    +
    +static
    +
    + +
    +
    + +

    ◆ kIterationsH

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2
    +
    +static
    +
    + +
    +
    + +

    ◆ kIterationsInHPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerRow

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerThread

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..1a9ffe26 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..d68dda08 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits-members.html new file mode 100644 index 00000000..f757540d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits-members.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits.html new file mode 100644 index 00000000..ca8c11ac --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits.html @@ -0,0 +1,325 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Classes

    struct  ThreadOffset
     
    + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kScalarsPerSts_ >::Tile Tile
     The tile. More...
     
    typedef Threads_ Threads
     The threads. More...
     
    typedef Shape< 0, ShapeCount< Tile >::kWc, Tile::kC, kScalarsPerSts_ > ThreadsStrides
     The strides to compute the base position of the thread. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/Threads::kC/kScalarsPerStsIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< 0, Threads::kH *ShapeCount< Tile >::kWc, Threads::kW *kScalarsPerStsDelta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + +

    +Static Public Attributes

    static int const kSkew = 0
     The skew. More...
     
    static int const kScalarsPerSts = kScalarsPerSts_
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / Threads::kC / kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Scalar
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Threads_ cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<0, ShapeCount<Tile>::kWc, Tile::kC, kScalarsPerSts_> cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadsStrides
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef ReshapeTile<Tile_, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerSts

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kScalarsPerSts = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kSkew = 0
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..60774aa5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..d79437c0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Struct Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits-members.html new file mode 100644 index 00000000..7e7090f2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    kMemorySpacecutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerRowcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerStscutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerThreadcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kThreadscutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    OutputTile typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Pointer typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits.html new file mode 100644 index 00000000..27f54141 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits.html @@ -0,0 +1,426 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef OutputTile_ OutputTile
     The dimension of the output tile. More...
     
    typedef Warps_ Warps
     The warps in the tile. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in the warps. More...
     
    typedef Shape< 1, 2, kScalarsPerRow/kScalarsPerSts, kScalarsPerStsTile
     The tile. More...
     
    typedef Shape< 1, 1, kScalarsPerThread/kScalarsPerStsIterations
     The number of iterations needed to store the tile. More...
     
    typedef Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kScalarsPerStsDelta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kScalarsPerSts = kScalarsPerSts_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
     The number of scalars per thread. More...
     
    static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
     The number of threads. More...
     
    static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
     The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, kScalarsPerThread / kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 2, kScalarsPerRow / kScalarsPerSts, kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerRow

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerSts

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerSts = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerThread

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..f143cd65 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..d9a6a9a3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits-members.html new file mode 100644 index 00000000..eb2702a3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits-members.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html new file mode 100644 index 00000000..f755f52f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html @@ -0,0 +1,344 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Classes

    struct  ThreadOffset
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kScalarsPerSts_ >::Tile Tile
     The tile. More...
     
    typedef Threads_ Threads
     The threads. More...
     
    typedef Shape< 0, ShapeCount< Tile >::kWc, Tile::kC, kScalarsPerSts_ > ThreadsStrides
     The strides to compute the base position of the thread. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/Threads::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< 0, Threads::kH *ShapeCount< Tile >::kWc, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, Threads::kH *ShapeCount< Tile >::kWc, Threads::kW *kAccessSizeImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + +

    +Static Public Attributes

    static int const kSkew = 0
     The skew. More...
     
    static int const kAccessSize = kScalarsPerSts_
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kAccessSize> cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kAccessSize> cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / Threads::kC / kAccessSize> cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Scalar
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Threads_ cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<0, ShapeCount<Tile>::kWc, Tile::kC, kScalarsPerSts_> cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadsStrides
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef ReshapeTile<Tile_, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kAccessSize = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kSkew = 0
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..6157a4d3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..876eea66 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Struct Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits-members.html new file mode 100644 index 00000000..5749940c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    kAccessSizecutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerRowcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerThreadcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kThreadscutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    OutputTile typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Pointer typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html new file mode 100644 index 00000000..a5e0b8d6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html @@ -0,0 +1,445 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef OutputTile_ OutputTile
     The dimension of the output tile. More...
     
    typedef Warps_ Warps
     The warps in the tile. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in the warps. More...
     
    typedef Shape< 1, 2, kScalarsPerRow/kAccessSize, kAccessSizeTile
     The tile. More...
     
    typedef Shape< 1, 1, kScalarsPerThread/kAccessSizeIterations
     The number of iterations needed to store the tile. More...
     
    typedef Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kAccessSizeImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kAccessSize = kScalarsPerSts_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
     The number of scalars per thread. More...
     
    static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
     The number of threads. More...
     
    static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
     The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, kScalarsPerThread / kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 2, kScalarsPerRow / kAccessSize, kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kAccessSize = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerRow

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerThread

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..8a28c519 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..673f9afb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits-members.html new file mode 100644 index 00000000..65870e11 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits-members.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    kMemorySpacecutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerStscutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Threads typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    ThreadsStrides typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits.html new file mode 100644 index 00000000..bd68edc4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits.html @@ -0,0 +1,344 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Classes

    struct  ThreadOffset
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kScalarsPerSts_ >::Tile TileWithoutSkew
     The tile without skews. More...
     
    typedef ReshapeTile< Shape< Tile_::kD, Tile_::kH, Tile_::kW+kSkew_ >, kScalarsPerSts_ >::Tile Tile
     The tile. More...
     
    typedef Threads_ Threads
     The threads. More...
     
    typedef Shape< 0, kScalarsPerSts_, ShapeCount< Tile >::kHwc/Threads::kW > ThreadsStrides
     The strides to compute the base position of the thread. More...
     
    typedef Shape< 1, TileWithoutSkew::kH/Threads::kW, TileWithoutSkew::kW/Threads::kH > Iterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< 0, ShapeCount< Tile >::kWc, Threads::kH *kScalarsPerStsDelta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + +

    +Static Public Attributes

    static int const kSkew = kSkew_
     The skew. More...
     
    static int const kScalarsPerSts = kScalarsPerSts_
     The number of scalars per STS. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<0, ShapeCount<Tile>::kWc, Threads::kH * kScalarsPerSts> cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<1, TileWithoutSkew::kH / Threads::kW, TileWithoutSkew::kW / Threads::kH> cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Threads_ cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<0, kScalarsPerSts_, ShapeCount<Tile>::kHwc / Threads::kW> cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadsStrides
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef ReshapeTile<Shape<Tile_::kD, Tile_::kH, Tile_::kW + kSkew_>, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef ReshapeTile<Tile_, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerSts

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kScalarsPerSts = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..1eda0c86 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..5cd71d1b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits-members.html new file mode 100644 index 00000000..0a64b450 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    kAccessSizecutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Threads typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    ThreadsStrides typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >protected
    Tile typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html new file mode 100644 index 00000000..ed1fb90b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html @@ -0,0 +1,375 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Classes

    struct  ThreadOffset
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kScalarsPerSts_ >::Tile TileWithoutSkew
     The tile without skews. More...
     
    typedef ReshapeTile< Shape< Tile_::kD, Tile_::kH, Tile_::kW+kSkew_ >, kScalarsPerSts_ >::Tile Tile
     The tile. More...
     
    typedef Threads_ Threads
     The threads. More...
     
    typedef Shape< 1, TileWithoutSkew::kH/Threads::kW, TileWithoutSkew::kW/Threads::kH > Iterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< 0, ShapeCount< Tile >::kWc, Threads::kH *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, ShapeCount< Tile >::kWc, Threads::kH *kAccessSizeImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + +

    +Static Public Attributes

    static int const kSkew = kSkew_
     The skew. More...
     
    static int const kAccessSize = kScalarsPerSts_
     The number of scalars per STS. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    + + + + +

    +Protected Types

    typedef Shape< 0, kScalarsPerSts_, ShapeCount< Tile >::kHwc/Threads::kW > ThreadsStrides
     The strides to compute the base position of the thread. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<0, ShapeCount<Tile>::kWc, Threads::kH * kAccessSize> cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<0, ShapeCount<Tile>::kWc, Threads::kH * kAccessSize> cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<1, TileWithoutSkew::kH / Threads::kW, TileWithoutSkew::kW / Threads::kH> cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Threads_ cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    typedef Shape<0, kScalarsPerSts_, ShapeCount<Tile>::kHwc / Threads::kW> cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadsStrides
    +
    +protected
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef ReshapeTile<Shape<Tile_::kD, Tile_::kH, Tile_::kW + kSkew_>, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef ReshapeTile<Tile_, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kAccessSize = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..1298ee52 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..cc55e56e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html new file mode 100644 index 00000000..8c1ffaf9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 00000000..9bca290f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html new file mode 100644 index 00000000..7affa6ef --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,236 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmConfig_::ScalarA Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^N. More...
     
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^N. More...
     
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, 0 > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for A^N. More...
     
    + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor
     The layout. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, Scalar const, Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, GemmConfig_::kScalarsPerLdgA> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ MultiplyAddScalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::MultiplyAdd::ScalarA cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::ScalarA cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, 0> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout = MatrixLayout::kColumnMajor
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png new file mode 100644 index 00000000..9ce259eb Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 00000000..09585beb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html new file mode 100644 index 00000000..809d799b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,263 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmConfig_::ScalarA Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^T. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA, 128/sizeof(MultiplyAddScalar)/GemmConfig_::kScalarsPerStsA/GlobalTileTraits::Threads::kW *kScalarsIn4BSharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^T. More...
     
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for A^T. More...
     
    + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor
     The layout. More...
     
    static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
     The number of scalars in 4B. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, Scalar const, Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>, Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, GemmConfig_::kScalarsPerLdgA> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ MultiplyAddScalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::MultiplyAdd::ScalarA cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::ScalarA cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, SharedStoreTileTraits::kSkew> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA, 128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsA / GlobalTileTraits::Threads::kW * kScalarsIn4B> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout = MatrixLayout::kRowMajor
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsIn4B

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png new file mode 100644 index 00000000..556e57b5 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html new file mode 100644 index 00000000..60b2921a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 00000000..be5a0a9b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html new file mode 100644 index 00000000..fc90114c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,263 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmConfig_::ScalarB Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^N. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB, 128/sizeof(MultiplyAddScalar)/GemmConfig_::kScalarsPerStsB/GlobalTileTraits::Threads::kW *kScalarsIn4BSharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^N. More...
     
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for B^N. More...
     
    + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor
     The layout. More...
     
    static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
     The number of scalars in 4B. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, Scalar const, Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>, Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, GemmConfig_::kScalarsPerLdgB> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ MultiplyAddScalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::MultiplyAdd::ScalarB cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::ScalarB cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, SharedStoreTileTraits::kSkew> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB, 128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsB / GlobalTileTraits::Threads::kW * kScalarsIn4B> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout = MatrixLayout::kColumnMajor
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsIn4B

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png new file mode 100644 index 00000000..4dd45b73 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 00000000..04d0fed0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html new file mode 100644 index 00000000..d2976060 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,236 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmConfig_::ScalarB Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^T. More...
     
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^T. More...
     
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, 0 > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for B^T. More...
     
    + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor
     The layout. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, Scalar const, Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, GemmConfig_::kScalarsPerLdgB> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ MultiplyAddScalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::MultiplyAdd::ScalarB cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::ScalarB cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, 0> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout = MatrixLayout::kRowMajor
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png new file mode 100644 index 00000000..f291cad7 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits-members.html new file mode 100644 index 00000000..05cab061 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits-members.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    Epilogue typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    GemmConfig typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    Index typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    kLayoutAcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >static
    kLayoutBcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    OutputTile typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    ScalarA typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    ScalarB typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    ScalarC typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    ScalarD typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits.html new file mode 100644 index 00000000..7153c823 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits.html @@ -0,0 +1,568 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  GlobalLoadStream
     Assemble the global load streams for A/B. More...
     
    struct  MainLoopSharedStorage
     
    struct  Params
     The params. More...
     
    struct  SharedLoadStream
     Assemble the shared load stream for A/B. More...
     
    union  SharedStorage
     The storage in shared memory. More...
     
    union  StreamSharedStorage
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmConfig_ GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef GlobalLoadStreamA_ GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef GlobalLoadStreamA_::Scalar ScalarA
     The scalar for A. More...
     
    typedef GlobalLoadStreamB_ GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef GlobalLoadStreamB_::Scalar ScalarB
     The scalar for B. More...
     
    typedef SharedLoadStreamA_ SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef SharedLoadStreamB_ SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef Epilogue_ Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef BlockSwizzle_ BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef ClearAccumulators_ ClearAccumulators
     Clear the accumulators. More...
     
    + + + + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayoutA = GlobalLoadStreamA::kLayout
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB = GlobalLoadStreamB::kLayout
     The layout of B. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ BlockSwizzle

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef BlockSwizzle_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::BlockSwizzle
    +
    + +
    +
    + +

    ◆ ClearAccumulators

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef ClearAccumulators_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ClearAccumulators
    +
    + +
    +
    + +

    ◆ Epilogue

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef Epilogue_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Epilogue
    +
    + +
    +
    + +

    ◆ GemmConfig

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GemmConfig_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GemmConfig
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamA

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamA_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreamA
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamB

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamB_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreamB
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef Index_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Index
    +
    + +
    +
    + +

    ◆ MultiplyAdd

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GemmConfig::MultiplyAdd cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MultiplyAdd
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GemmConfig::OutputTile cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::OutputTile
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamA_::Scalar cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamB_::Scalar cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef Epilogue::ScalarC cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ScalarD

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef Epilogue::ScalarD cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ScalarD
    +
    + +
    +
    + +

    ◆ SharedLoadStreamA

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef SharedLoadStreamA_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreamA
    +
    + +
    +
    + +

    ◆ SharedLoadStreamB

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef SharedLoadStreamB_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreamB
    +
    + +
    +
    + +

    ◆ SharedStoreStorageA

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamA::SharedStoreStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStoreStorageA
    +
    + +
    +
    + +

    ◆ SharedStoreStorageB

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamB::SharedStoreStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStoreStorageB
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ shared_load_fence()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::shared_load_fence (bool in_loop)
    +
    +inlinestatic
    +
    + +
    +
    + +

    ◆ shared_store_fence()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::shared_store_fence (bool in_loop)
    +
    +inlinestatic
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayoutA

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::kLayoutA = GlobalLoadStreamA::kLayout
    +
    +static
    +
    + +
    +
    + +

    ◆ kLayoutB

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::kLayoutB = GlobalLoadStreamB::kLayout
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream-members.html new file mode 100644 index 00000000..5f89b801 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream, including all inherited members.

    + + + + + + + +
    commit()cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreaminline
    copy()cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreaminline
    GlobalLoadStream(Params const &params, SharedStorage &shared_storage, dim3 const &block)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreaminline
    residue(Index k, bool skip_clear=false)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreaminline
    stream_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream
    stream_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html new file mode 100644 index 00000000..6343fabc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html @@ -0,0 +1,295 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream Struct Reference
    +
    +
    + +

    Assemble the global load streams for A/B. +

    + +

    #include <gemm_traits.h>

    + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GlobalLoadStream (Params const &params, SharedStorage &shared_storage, dim3 const &block)
     Ctor. More...
     
    CUTLASS_DEVICE void copy ()
     Trigger the copies from shared memory to registers. More...
     
    CUTLASS_DEVICE void commit ()
     Commit the data. More...
     
    CUTLASS_DEVICE void residue (Index k, bool skip_clear=false)
     Execute the residue code. More...
     
    + + + + + + + +

    +Public Attributes

    GlobalLoadStreamA stream_a
     The stream for A. More...
     
    GlobalLoadStreamB stream_b
     The stream for B. More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ GlobalLoadStream()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::GlobalLoadStream (Params const & params,
    SharedStorageshared_storage,
    dim3 const & block 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::copy ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ residue()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::residue (Index k,
    bool skip_clear = false 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ stream_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    GlobalLoadStreamA cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::stream_a
    +
    + +
    +
    + +

    ◆ stream_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    GlobalLoadStreamB cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::stream_b
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage-members.html new file mode 100644 index 00000000..5f362203 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html new file mode 100644 index 00000000..95f9a829 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html @@ -0,0 +1,154 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage Struct Reference
    +
    +
    + +

    #include <gemm_traits.h>

    + + + + + + + + +

    +Public Attributes

    StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamAstream_a
     
    StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamBstream_b
     
    ClearAccumulators::SharedStorage clear
     
    +

    Member Data Documentation

    + +

    ◆ clear

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    ClearAccumulators::SharedStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage::clear
    +
    + +
    +
    + +

    ◆ stream_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    StreamSharedStorage<GlobalLoadStreamA, SharedLoadStreamA> cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage::stream_a
    +
    + +
    +
    + +

    ◆ stream_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    StreamSharedStorage<GlobalLoadStreamB, SharedLoadStreamB> cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage::stream_b
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params-members.html new file mode 100644 index 00000000..05de1ce4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params-members.html @@ -0,0 +1,99 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params, including all inherited members.

    + + + + + + + + + + +
    epiloguecutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    global_stream_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    global_stream_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    initialize(GemmDesc_ const &desc)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Paramsinline
    kcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    mcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    ncutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    shared_stream_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    shared_stream_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html new file mode 100644 index 00000000..ffeb872b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html @@ -0,0 +1,292 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_traits.h>

    + + + + + + +

    +Public Member Functions

    template<typename GemmDesc_ >
    CUTLASS_HOST_DEVICE int initialize (GemmDesc_ const &desc)
     Initialize the parameters. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Index m
     The dimensions of the GEMM. More...
     
    Index n
     
    Index k
     
    GlobalLoadStreamA::Params global_stream_a
     The params for the A stream. More...
     
    GlobalLoadStreamB::Params global_stream_b
     The params for the B stream. More...
     
    SharedLoadStreamA::Params shared_stream_a
     The params for the A stream from shared memory. More...
     
    SharedLoadStreamB::Params shared_stream_b
     The params for the B stream from shared memory. More...
     
    Epilogue::Params epilogue
     The params for the epilogue. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    +
    +template<typename GemmDesc_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::initialize (GemmDesc_ const & desc)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ epilogue

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    Epilogue::Params cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::epilogue
    +
    + +
    +
    + +

    ◆ global_stream_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    GlobalLoadStreamA::Params cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::global_stream_a
    +
    + +
    +
    + +

    ◆ global_stream_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    GlobalLoadStreamB::Params cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::global_stream_b
    +
    + +
    +
    + +

    ◆ k

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    Index cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::k
    +
    + +
    +
    + +

    ◆ m

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    Index cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::m
    +
    + +
    +
    + +

    ◆ n

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    Index cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::n
    +
    + +
    +
    + +

    ◆ shared_stream_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamA::Params cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::shared_stream_a
    +
    + +
    +
    + +

    ◆ shared_stream_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamB::Params cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::shared_stream_b
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream-members.html new file mode 100644 index 00000000..64969b35 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream, including all inherited members.

    + + + + + + + + + + + + + +
    commit(int step)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    copy(int step)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    fetched_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    fetched_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    fragment_a(int step) constcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    fragment_b(int step) constcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    inc_stage()cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    SharedLoadStream(Params const &params, SharedStorage &shared_storage)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    stream_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    stream_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    transformed_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    transformed_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html new file mode 100644 index 00000000..7735b012 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html @@ -0,0 +1,418 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream Struct Reference
    +
    +
    + +

    Assemble the shared load stream for A/B. +

    + +

    #include <gemm_traits.h>

    + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE SharedLoadStream (Params const &params, SharedStorage &shared_storage)
     Ctor. More...
     
    CUTLASS_DEVICE void copy (int step)
     Trigger the copies from shared memory to registers. More...
     
    CUTLASS_DEVICE void commit (int step)
     Commit the data. More...
     
    CUTLASS_DEVICE SharedLoadStreamA::Fragment const & fragment_a (int step) const
     The fragment A. More...
     
    CUTLASS_DEVICE SharedLoadStreamB::Fragment const & fragment_b (int step) const
     The fragment B. More...
     
    CUTLASS_DEVICE void inc_stage ()
     Increment the stage. More...
     
    + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    SharedLoadStreamA stream_a
     The stream for A. More...
     
    SharedLoadStreamA::FetchedFragment fetched_a [2]
     The fragments to fetch A. More...
     
    SharedLoadStreamA::TransformedFragment transformed_a [2]
     The fragments to transform A. More...
     
    SharedLoadStreamB stream_b
     The stream for B. More...
     
    SharedLoadStreamB::FetchedFragment fetched_b [2]
     The fragments to fetch B. More...
     
    SharedLoadStreamB::TransformedFragment transformed_b [2]
     The fragments to transform B. More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ SharedLoadStream()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::SharedLoadStream (Params const & params,
    SharedStorageshared_storage 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::commit (int step)
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::copy (int step)
    +
    +inline
    +
    + +
    +
    + +

    ◆ fragment_a()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE SharedLoadStreamA::Fragment const& cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::fragment_a (int step) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ fragment_b()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE SharedLoadStreamB::Fragment const& cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::fragment_b (int step) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_stage()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::inc_stage ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ fetched_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamA::FetchedFragment cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::fetched_a[2]
    +
    + +
    +
    + +

    ◆ fetched_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamB::FetchedFragment cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::fetched_b[2]
    +
    + +
    +
    + +

    ◆ stream_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamA cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::stream_a
    +
    + +
    +
    + +

    ◆ stream_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamB cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::stream_b
    +
    + +
    +
    + +

    ◆ transformed_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamA::TransformedFragment cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::transformed_a[2]
    +
    + +
    +
    + +

    ◆ transformed_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamB::TransformedFragment cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::transformed_b[2]
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params-members.html new file mode 100644 index 00000000..28de78f1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::Gemm< GemmTraits_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::Gemm< GemmTraits_ >::Params, including all inherited members.

    + + +
    initialize(Index m, Index n, Index k, ScalarEpilogue alpha, ScalarA const *d_a, Index lda, ScalarB const *d_b, Index ldb, ScalarEpilogue beta, ScalarC const *d_c, Index ldc, ScalarD *d_d, Index ldd)cutlass::gemm::Gemm< GemmTraits_ >::Paramsinline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.html new file mode 100644 index 00000000..63d08ff9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.html @@ -0,0 +1,217 @@ + + + + + + + +Cutlass: cutlass::gemm::Gemm< GemmTraits_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::Gemm< GemmTraits_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm.h>

    +
    +Inheritance diagram for cutlass::gemm::Gemm< GemmTraits_ >::Params:
    +
    +
    + + + +
    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Index m, Index n, Index k, ScalarEpilogue alpha, ScalarA const *d_a, Index lda, ScalarB const *d_b, Index ldb, ScalarEpilogue beta, ScalarC const *d_c, Index ldc, ScalarD *d_d, Index ldd)
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::Gemm< GemmTraits_ >::Params::initialize (Index m,
    Index n,
    Index k,
    ScalarEpilogue alpha,
    ScalarA const * d_a,
    Index lda,
    ScalarB const * d_b,
    Index ldb,
    ScalarEpilogue beta,
    ScalarC const * d_c,
    Index ldc,
    ScalarDd_d,
    Index ldd 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.png b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.png new file mode 100644 index 00000000..6e8ee648 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent.html b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent.html new file mode 100644 index 00000000..c955db65 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::GetExtent< kOperand_, Tile_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GetExtent< kOperand_, Tile_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_operand.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4-members.html new file mode 100644 index 00000000..725806f6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >, including all inherited members.

    + + +
    kExtentcutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html new file mode 100644 index 00000000..4e613ddc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Attributes

    static const int kExtent = Tile_::kW
     
    +

    Member Data Documentation

    + +

    ◆ kExtent

    + +
    +
    +
    +template<typename Tile_ >
    + + + + + +
    + + + + +
    const int cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >::kExtent = Tile_::kW
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4-members.html new file mode 100644 index 00000000..d17a7e7c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >, including all inherited members.

    + + +
    kExtentcutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html new file mode 100644 index 00000000..172db999 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Attributes

    static const int kExtent = Tile_::kH
     
    +

    Member Data Documentation

    + +

    ◆ kExtent

    + +
    +
    +
    +template<typename Tile_ >
    + + + + + +
    + + + + +
    const int cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >::kExtent = Tile_::kH
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream-members.html new file mode 100644 index 00000000..f3b227eb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream-members.html @@ -0,0 +1,112 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >
    commit()cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    copy()cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    fetched_fragmentcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    FetchedFragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Fragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    GlobalLoadStream(typename Base::Params const &params, typename Base::SharedStorage &shared_storage, Coord< 3 > const &bounds, Coord< 3 > const &block)cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >inline
    GlobalLoadStreamBase(Params const &params, SharedStorage &shared_storage, Coord< 3 > const bounds, Coord< 3 > const &block)cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    Index typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    kLayoutcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >static
    load_iteratorcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    LoadIterator typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Pointer typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    residue(Index k, bool skip_clear=false)cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    Scalar typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    SharedStoreStorage typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    store_iteratorcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    StoreIterator typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    transformed_fragmentcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    TransformedFragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Transformer typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    transformercutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.html new file mode 100644 index 00000000..99cbcad5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.html @@ -0,0 +1,253 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_stream.h>

    +
    +Inheritance diagram for cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >:
    +
    +
    + + +cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > Base
     The base class. More...
     
    - Public Types inherited from cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    typedef LoadIterator_ LoadIterator
     The load iterator. More...
     
    typedef Transformer_ Transformer
     The transformer. More...
     
    typedef StoreIterator_ StoreIterator
     The store iterator to write to shared memory. More...
     
    typedef LoadIterator::Fragment FetchedFragment
     The fragment that is copied from shared memory. More...
     
    typedef Transformer::OutputFragment TransformedFragment
     The fragment that is obtained after the transformation by the transformer. More...
     
    typedef TransformedFragment Fragment
     Make sure the fragments match. More...
     
    typedef LoadIterator::Scalar Scalar
     The scalar type of the iterator. More...
     
    typedef LoadIterator::Pointer Pointer
     The pointer. More...
     
    typedef LoadIterator::Index Index
     The index. More...
     
    typedef StoreIterator::SharedStorage SharedStoreStorage
     The amount of storage in shared memory needed to store the tile. More...
     
    + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GlobalLoadStream (typename Base::Params const &params, typename Base::SharedStorage &shared_storage, Coord< 3 > const &bounds, Coord< 3 > const &block)
     Ctor. More...
     
    - Public Member Functions inherited from cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    CUTLASS_DEVICE GlobalLoadStreamBase (Params const &params, SharedStorage &shared_storage, Coord< 3 > const bounds, Coord< 3 > const &block)
     Ctor. More...
     
    CUTLASS_DEVICE void copy ()
     Load the data from shared memory to the fetch fragment. More...
     
    CUTLASS_DEVICE void commit ()
     Commit the data. More...
     
    CUTLASS_DEVICE void residue (Index k, bool skip_clear=false)
     Execute the residue code. More...
     
    + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Attributes inherited from cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    LoadIterator load_iterator
     The iterator. More...
     
    FetchedFragment fetched_fragment
     The fragment to fetch from shared memory. More...
     
    Transformer transformer
     The transformer. More...
     
    TransformedFragment transformed_fragment
     The fragment to convert the data after it has been fetched from shared memory. More...
     
    StoreIterator store_iterator
     The store iterator. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    static MatrixLayout::Kind const kLayout = LoadIterator::kLayout
     Make sure the transformed fragment is the same as the store fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ = Copy<typename LoadIterator_::Fragment>>
    + + + + +
    typedef GlobalLoadStreamBase<LoadIterator_, StoreIterator_, Transformer_> cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >::Base
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GlobalLoadStream()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ = Copy<typename LoadIterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >::GlobalLoadStream (typename Base::Params const & params,
    typename Base::SharedStorageshared_storage,
    Coord< 3 > const & bounds,
    Coord< 3 > const & block 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.png b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.png new file mode 100644 index 00000000..4959cca9 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase-members.html new file mode 100644 index 00000000..372751d8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase-members.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + +
    commit()cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    copy()cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    fetched_fragmentcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    FetchedFragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Fragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    GlobalLoadStreamBase(Params const &params, SharedStorage &shared_storage, Coord< 3 > const bounds, Coord< 3 > const &block)cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    Index typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    kLayoutcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >static
    load_iteratorcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    LoadIterator typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Pointer typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    residue(Index k, bool skip_clear=false)cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    Scalar typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    SharedStoreStorage typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    store_iteratorcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    StoreIterator typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    transformed_fragmentcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    TransformedFragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Transformer typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    transformercutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html new file mode 100644 index 00000000..8741f1ba --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html @@ -0,0 +1,602 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_stream.h>

    +
    +Inheritance diagram for cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >:
    +
    +
    + + +cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ > + +
    + + + + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    union  SharedStorage
     The storage in shared memory needed by that stream. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef LoadIterator_ LoadIterator
     The load iterator. More...
     
    typedef Transformer_ Transformer
     The transformer. More...
     
    typedef StoreIterator_ StoreIterator
     The store iterator to write to shared memory. More...
     
    typedef LoadIterator::Fragment FetchedFragment
     The fragment that is copied from shared memory. More...
     
    typedef Transformer::OutputFragment TransformedFragment
     The fragment that is obtained after the transformation by the transformer. More...
     
    typedef TransformedFragment Fragment
     Make sure the fragments match. More...
     
    typedef LoadIterator::Scalar Scalar
     The scalar type of the iterator. More...
     
    typedef LoadIterator::Pointer Pointer
     The pointer. More...
     
    typedef LoadIterator::Index Index
     The index. More...
     
    typedef StoreIterator::SharedStorage SharedStoreStorage
     The amount of storage in shared memory needed to store the tile. More...
     
    + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GlobalLoadStreamBase (Params const &params, SharedStorage &shared_storage, Coord< 3 > const bounds, Coord< 3 > const &block)
     Ctor. More...
     
    CUTLASS_DEVICE void copy ()
     Load the data from shared memory to the fetch fragment. More...
     
    CUTLASS_DEVICE void commit ()
     Commit the data. More...
     
    CUTLASS_DEVICE void residue (Index k, bool skip_clear=false)
     Execute the residue code. More...
     
    + + + + + + + + + + + + + + + + +

    +Public Attributes

    LoadIterator load_iterator
     The iterator. More...
     
    FetchedFragment fetched_fragment
     The fragment to fetch from shared memory. More...
     
    Transformer transformer
     The transformer. More...
     
    TransformedFragment transformed_fragment
     The fragment to convert the data after it has been fetched from shared memory. More...
     
    StoreIterator store_iterator
     The store iterator. More...
     
    + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = LoadIterator::kLayout
     Make sure the transformed fragment is the same as the store fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ FetchedFragment

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef LoadIterator::Fragment cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::FetchedFragment
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef TransformedFragment cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Fragment
    +
    +

    The output fragment.

    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef LoadIterator::Index cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Index
    +
    + +
    +
    + +

    ◆ LoadIterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef LoadIterator_ cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::LoadIterator
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef LoadIterator::Pointer cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef LoadIterator::Scalar cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedStoreStorage

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef StoreIterator::SharedStorage cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStoreStorage
    +
    + +
    +
    + +

    ◆ StoreIterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef StoreIterator_ cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::StoreIterator
    +
    + +
    +
    + +

    ◆ TransformedFragment

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef Transformer::OutputFragment cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::TransformedFragment
    +
    + +
    +
    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef Transformer_ cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Transformer
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GlobalLoadStreamBase()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::GlobalLoadStreamBase (Params const & params,
    SharedStorageshared_storage,
    Coord< 3 > const bounds,
    Coord< 3 > const & block 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::copy ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ residue()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::residue (Index k,
    bool skip_clear = false 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ fetched_fragment

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    FetchedFragment cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::fetched_fragment
    +
    + +
    +
    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::kLayout = LoadIterator::kLayout
    +
    +static
    +
    +

    The layout.

    + +
    +
    + +

    ◆ load_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    LoadIterator cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::load_iterator
    +
    + +
    +
    + +

    ◆ store_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    StoreIterator cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::store_iterator
    +
    + +
    +
    + +

    ◆ transformed_fragment

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    TransformedFragment cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::transformed_fragment
    +
    + +
    +
    + +

    ◆ transformer

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    Transformer cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.png b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.png new file mode 100644 index 00000000..65687089 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params-members.html new file mode 100644 index 00000000..f2125003 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html new file mode 100644 index 00000000..31c214b2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html @@ -0,0 +1,185 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_global_stream.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Pointer pointer, Index ld)
     Setup the params. More...
     
    + + + + + +

    +Public Attributes

    LoadIterator::Params load_iterator
     
    StoreIterator::Params store_iterator
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params::initialize (Pointer pointer,
    Index ld 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ load_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    LoadIterator::Params cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params::load_iterator
    +
    + +
    +
    + +

    ◆ store_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    StoreIterator::Params cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params::store_iterator
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream-members.html new file mode 100644 index 00000000..1c760282 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream-members.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + +
    commit()cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >inline
    copy()cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >inline
    fragment()cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >inline
    Fragment typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    GlobalStoreStream(Params const &params, SharedStorage &shared_storage, Index m, Index n, Index k, Coord< 3 > const &block)cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >inline
    inc(Index predicate_inc, Index pointer_inc)cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >inline
    Index typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    input_fragmentcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    InputFragment typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    iteratorcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    Iterator typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    Pointer typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    Scalar typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    SharedStorage typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    transformed_fragmentcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    TransformedFragment typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    transformercutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    Transformer typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream.html new file mode 100644 index 00000000..3913ca1d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream.html @@ -0,0 +1,562 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_stream.h>

    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Iterator_ Iterator
     The store iterator. More...
     
    typedef Transformer_ Transformer
     The transformer. More...
     
    typedef Transformer::InputFragment InputFragment
     The input fragment. More...
     
    typedef Transformer::OutputFragment TransformedFragment
     The fragment that is obtained after the transformation by the transformer. More...
     
    typedef InputFragment Fragment
     Make sure the fragments match. More...
     
    typedef Iterator::Scalar Scalar
     The scalar type of the iterator. More...
     
    typedef Iterator::Pointer Pointer
     The pointer. More...
     
    typedef Iterator::Index Index
     The index. More...
     
    typedef Iterator::SharedStorage SharedStorage
     The storage in shared memory needed by that stream. More...
     
    + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GlobalStoreStream (Params const &params, SharedStorage &shared_storage, Index m, Index n, Index k, Coord< 3 > const &block)
     Ctor. More...
     
    CUTLASS_DEVICE void copy ()
     Trigger the copy from the fragment to shared memory. More...
     
    CUTLASS_DEVICE void commit ()
     Commit the data. More...
     
    CUTLASS_DEVICE void inc (Index predicate_inc, Index pointer_inc)
     Increment the iterator. More...
     
    CUTLASS_DEVICE Fragmentfragment ()
     The fragment. More...
     
    + + + + + + + + + + + + + +

    +Public Attributes

    Iterator iterator
     The iterator. More...
     
    InputFragment input_fragment
     The input fragment. More...
     
    Transformer transformer
     The transformer. More...
     
    TransformedFragment transformed_fragment
     The fragment containing the transformed data before the copy into shared memory. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef InputFragment cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Fragment
    +
    +

    The input fragment.

    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Index cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Index
    +
    + +
    +
    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer::InputFragment cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::InputFragment
    +
    + +
    +
    + +

    ◆ Iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator_ cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Iterator
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Pointer cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Scalar cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::SharedStorage cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ TransformedFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer::OutputFragment cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::TransformedFragment
    +
    + +
    +
    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer_ cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Transformer
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GlobalStoreStream()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::GlobalStoreStream (Params const & params,
    SharedStorageshared_storage,
    Index m,
    Index n,
    Index k,
    Coord< 3 > const & block 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::copy ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ fragment()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE Fragment& cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::fragment ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::inc (Index predicate_inc,
    Index pointer_inc 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ input_fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    InputFragment cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::input_fragment
    +
    + +
    +
    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::iterator
    +
    + +
    +
    + +

    ◆ transformed_fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    TransformedFragment cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::transformed_fragment
    +
    + +
    +
    + +

    ◆ transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Transformer cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params-members.html new file mode 100644 index 00000000..242c1fb8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params.html new file mode 100644 index 00000000..d859deef --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params.html @@ -0,0 +1,168 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_global_stream.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Pointer pointer, Index ld)
     Setup the params. More...
     
    + + + + +

    +Public Attributes

    Iterator::Params iterator
     The iterator params. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Params::initialize (Pointer pointer,
    Index ld 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator::Params cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Params::iterator
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig-members.html new file mode 100644 index 00000000..07ccbe6d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    InstructionShape typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kStagescutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kThreadscutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kWarpSizecutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    OutputTile typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    ScalarA typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    ScalarB typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    ScalarC typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    ScalarD typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    Warps typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.html new file mode 100644 index 00000000..b5879328 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.html @@ -0,0 +1,177 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >:
    +
    +
    + + +cutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    typedef half ScalarA
     The scalar for A. More...
     
    typedef half ScalarB
     The scalar for B. More...
     
    typedef half ScalarC
     The scalar for C. More...
     
    typedef half ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half > MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    static int const kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA
     
    static int const kScalarsPerLdsA
     
    static int const kScalarsPerLdgB
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB
     
    static int const kScalarsPerLdsB
     
    static int const kScalarsPerLdgC
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD
     
    static int const kScalarsPerLdsD
     
    static int const kAccumulatorsPerLdsA
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB
     
    static int const kStages
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.png new file mode 100644 index 00000000..cf90457b Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits-members.html new file mode 100644 index 00000000..f1a365e8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Delta typedefcutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Iterations typedefcutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ThreadsDelta typedefcutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html new file mode 100644 index 00000000..eca01a3c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html @@ -0,0 +1,254 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
     The base class. More...
     
    typedef Base::Threads Threads
     The threads. More...
     
    typedef Shape< 1, 2, Base::Tile::kC > ThreadsDelta
     The threads strides. More...
     
    typedef Shape< Base::Threads::kH *2, 1, Base::Threads::kW, Base::kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< Base::Tile::kH/Base::Threads::kH/2, 2, Base::Tile::kW/Base::Threads::kW, Base::Tile::kC/Base::kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    - Public Types inherited from cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile shape. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads shape. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsDelta
     The relative offset between two elements in the H/W dimension in adjacent threads. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSizeImmediateOffsetStrides
     Strides for immediate offset computation. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Static Public Attributes inherited from cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    static GemmOperand::Kind const kOperand = kOperand_
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout = kLayout_
     The layout. More...
     
    static int const kAccessSize = kAccessSize_
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kGlobal
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef GemmGlobalTileTraits<kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_> cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<Base::Threads::kH * 2, 1, Base::Threads::kW, Base::kAccessSize> cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<Base::Tile::kH / Base::Threads::kH / 2, 2, Base::Tile::kW / Base::Threads::kW, Base::Tile::kC / Base::kAccessSize> cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Iterations
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Base::Threads cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsDelta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<1, 2, Base::Tile::kC> cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadsDelta
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.png new file mode 100644 index 00000000..5af8a710 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..2fa9bb6b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..b9fb35ed --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <hgemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle-members.html new file mode 100644 index 00000000..1abd2b90 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmSwizzle< GlobalIterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle.html new file mode 100644 index 00000000..495f1445 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle.html @@ -0,0 +1,273 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmSwizzle< GlobalIterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmSwizzle< GlobalIterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_swizzle.h>

    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GlobalIterator_ GlobalIterator
     The global iterator. More...
     
    typedef GlobalIterator::Fragment Fragment
     The source fragment. More...
     
    typedef GlobalIterator::FragmentShape FragmentShape
     The shape of the source fragment. More...
     
    typedef Fragment InputFragment
     The input fragment. More...
     
    typedef Fragment OutputFragment
     The output fragment. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE HgemmSwizzle ()
     The src/dst must be half fragments. More...
     
    CUTLASS_DEVICE void transform (Fragment const &src, Fragment &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator::Fragment cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator::FragmentShape cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ GlobalIterator

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator_ cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::GlobalIterator
    +
    + +
    +
    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef Fragment cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef Fragment cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ HgemmSwizzle()

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::HgemmSwizzle ()
    +
    +inline
    +
    +

    The number of elements must be a multiple of 2. Ctor.

    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform()

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::transform (Fragment const & src,
    Fragmentdst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html new file mode 100644 index 00000000..6d21685c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTileTraitsHelperA< kLayout_, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTileTraitsHelperA< kLayout_, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmTileTraitsHelperA< kLayout_, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperA< kLayout_, GemmConfig_ > + +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.png new file mode 100644 index 00000000..9d906983 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 00000000..85433935 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html new file mode 100644 index 00000000..9e370edb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,211 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Base
     The base config. More...
     
    typedef HgemmCrosswiseGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, half const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^T. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< half, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, 2, 128/sizeof(half)/GlobalTileTraits::Threads::kW/2 > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^T. More...
     
    typedef GemmSharedLoadTileATraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for A^T. More...
     
    - Public Types inherited from cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
    typedef GemmConfig_::ScalarA Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^T. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA, 128/sizeof(MultiplyAddScalar)/GemmConfig_::kScalarsPerStsA/GlobalTileTraits::Threads::kW *kScalarsIn4BSharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^T. More...
     
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for A^T. More...
     
    + + + + + + + + +

    +Additional Inherited Members

    - Static Public Attributes inherited from cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
    static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor
     The layout. More...
     
    static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
     The number of scalars in 4B. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_> cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Base
    +
    + +
    +
    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef HgemmCrosswiseGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, half const, Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>, Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, GemmConfig_::kScalarsPerLdgA> cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileATraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew> cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreWithSkewTileAbTraits< half, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, 2, 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2> cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png new file mode 100644 index 00000000..389d73d1 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.html new file mode 100644 index 00000000..b331b74b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTileTraitsHelperB< kLayout_, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTileTraitsHelperB< kLayout_, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmTileTraitsHelperB< kLayout_, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperB< kLayout_, GemmConfig_ > + +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.png new file mode 100644 index 00000000..87ca082a Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 00000000..273311c2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html new file mode 100644 index 00000000..8a59bc28 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,211 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Base
     The base config. More...
     
    typedef HgemmCrosswiseGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, half const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^N. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< half, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, 2, 128/sizeof(half)/GlobalTileTraits::Threads::kW/2 > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^N. More...
     
    typedef GemmSharedLoadTileBTraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for B^N. More...
     
    - Public Types inherited from cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
    typedef GemmConfig_::ScalarB Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^N. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB, 128/sizeof(MultiplyAddScalar)/GemmConfig_::kScalarsPerStsB/GlobalTileTraits::Threads::kW *kScalarsIn4BSharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^N. More...
     
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for B^N. More...
     
    + + + + + + + + +

    +Additional Inherited Members

    - Static Public Attributes inherited from cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
    static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor
     The layout. More...
     
    static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
     The number of scalars in 4B. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_> cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Base
    +
    + +
    +
    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef HgemmCrosswiseGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, half const, Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>, Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, GemmConfig_::kScalarsPerLdgB> cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileBTraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew> cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreWithSkewTileAbTraits< half, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, 2, 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2> cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png new file mode 100644 index 00000000..c90cbb13 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits-members.html new file mode 100644 index 00000000..2a51feb2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits-members.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    Epilogue typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GemmConfig typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    Index typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    kLayoutAcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >static
    kLayoutBcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    OutputTile typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarC typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarD typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.html new file mode 100644 index 00000000..a9e4c269 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.html @@ -0,0 +1,172 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ >:
    +
    +
    + + +cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    typedef Helper_::GemmConfig GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef Helper_::GlobalLoadStreamA GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamA ::Scalar ScalarA
     The scalar for A. More...
     
    typedef Helper_::GlobalLoadStreamB GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamB ::Scalar ScalarB
     The scalar for B. More...
     
    typedef Helper_::SharedLoadStreamA SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef Helper_::SharedLoadStreamB SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef Helper_::Epilogue Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef IdentityBlockSwizzle BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef Helper_::ClearAccumulators ClearAccumulators
     Clear the accumulators. More...
     
    - Static Public Member Functions inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    static MatrixLayout::Kind const kLayoutA
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB
     The layout of B. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.png new file mode 100644 index 00000000..03fc4145 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper-members.html new file mode 100644 index 00000000..78f07825 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper-members.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + +
    ClearAccumulators typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    Epilogue typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GemmConfig typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GemmEpilogueTraits typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GemmTileTraitsHelperA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GemmTileTraitsHelperB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalLoadIteratorA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalLoadIteratorB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalLoadStreamA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalLoadStreamB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalTransformerA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalTransformerB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    MultiplyAdd typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedLoadIteratorA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedLoadIteratorB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedLoadStreamA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedLoadStreamB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedStoreIteratorA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedStoreIteratorB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper.html new file mode 100644 index 00000000..1ec8904e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper.html @@ -0,0 +1,460 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > GemmConfig
     The HGEMM config. More...
     
    typedef HgemmTileTraitsHelperA< kLayoutA_, GemmConfigGemmTileTraitsHelperA
     The GEMM config for A. More...
     
    typedef HgemmTileTraitsHelperB< kLayoutB_, GemmConfigGemmTileTraitsHelperB
     The GEMM config for B. More...
     
    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
     The iterator to load A from global memory. More...
     
    typedef HgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
     The default transformer for A. More...
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorA
     The iterator to store A to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerAGlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
     The iterator to load B from global memory. More...
     
    typedef HgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorB
     The iterator to store B to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerBGlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorA
     The iterator to load A from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorASharedLoadStreamA
     The stream to load A from shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorB
     The iterator to load B from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorBSharedLoadStreamB
     The stream to load B from shared memory. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The functor to do the multiply-add in the main loop. More...
     
    typedef ClearAccumulators< typename MultiplyAdd::ScalarCClearAccumulators
     The object to clear accumulators. More...
     
    typedef SimplifiedGemmEpilogueTraits< GemmConfig, EpilogueFunctor_, Index_ > GemmEpilogueTraits
     The traits class for the epilogue. More...
     
    typedef GemmEpilogue< GemmEpilogueTraitsEpilogue
     The epilogue. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ ClearAccumulators

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef ClearAccumulators<typename MultiplyAdd::ScalarC> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::ClearAccumulators
    +
    + +
    +
    + +

    ◆ Epilogue

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GemmEpilogue<GemmEpilogueTraits> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::Epilogue
    +
    + +
    +
    + +

    ◆ GemmConfig

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef HgemmConfig<OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmConfig
    +
    + +
    +
    + +

    ◆ GemmEpilogueTraits

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef SimplifiedGemmEpilogueTraits<GemmConfig, EpilogueFunctor_, Index_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmEpilogueTraits
    +
    + +
    +
    + +

    ◆ GemmTileTraitsHelperA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef HgemmTileTraitsHelperA<kLayoutA_, GemmConfig> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmTileTraitsHelperA
    +
    + +
    +
    + +

    ◆ GemmTileTraitsHelperB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef HgemmTileTraitsHelperB<kLayoutB_, GemmConfig> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmTileTraitsHelperB
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperA::GlobalTileTraits, Index_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadIteratorA
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperB::GlobalTileTraits, Index_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadIteratorB
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadStreamA
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadStreamB
    +
    + +
    +
    + +

    ◆ GlobalTransformerA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef HgemmTransformerA<GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA>::Transformer cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalTransformerA
    +
    + +
    +
    + +

    ◆ GlobalTransformerB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef HgemmTransformerB<GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB>::Transformer cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalTransformerB
    +
    + +
    +
    + +

    ◆ MultiplyAdd

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GemmConfig::MultiplyAdd cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::MultiplyAdd
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadIteratorA
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadIteratorB
    +
    + +
    +
    + +

    ◆ SharedLoadStreamA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorA> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadStreamA
    +
    + +
    +
    + +

    ◆ SharedLoadStreamB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorB> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadStreamB
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedStoreIteratorA
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedStoreIteratorB
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA.html new file mode 100644 index 00000000..505f08a5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html new file mode 100644 index 00000000..7846aeeb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html new file mode 100644 index 00000000..092d948b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    + + + + +

    +Public Types

    typedef Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef Convert<typename Iterator_::Fragment, typename Iterator_::Fragment> cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html new file mode 100644 index 00000000..4877f001 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html new file mode 100644 index 00000000..8837b699 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    + + + + +

    +Public Types

    typedef HgemmSwizzle< Iterator_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef HgemmSwizzle<Iterator_> cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB.html new file mode 100644 index 00000000..99325439 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerB< kLayout_, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerB< kLayout_, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html new file mode 100644 index 00000000..fdfc6a00 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html new file mode 100644 index 00000000..93727e27 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    + + + + +

    +Public Types

    typedef HgemmSwizzle< Iterator_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef HgemmSwizzle<Iterator_> cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html new file mode 100644 index 00000000..e655326e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html new file mode 100644 index 00000000..013566c2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    + + + + +

    +Public Types

    typedef Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef Convert<typename Iterator_::Fragment, typename Iterator_::Fragment> cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle-members.html new file mode 100644 index 00000000..748a3a49 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IdentityBlockSwizzle Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IdentityBlockSwizzle, including all inherited members.

    + + + +
    IdentityBlockSwizzle()cutlass::gemm::IdentityBlockSwizzleinline
    swizzle()cutlass::gemm::IdentityBlockSwizzleinline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html b/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html new file mode 100644 index 00000000..68a70c7b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html @@ -0,0 +1,157 @@ + + + + + + + +Cutlass: cutlass::gemm::IdentityBlockSwizzle Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IdentityBlockSwizzle Struct Reference
    +
    +
    + +

    #include <identity_block_swizzle.h>

    + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IdentityBlockSwizzle ()
     Ctor. More...
     
    CUTLASS_DEVICE dim3 swizzle ()
     Swizzle the block index. More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ IdentityBlockSwizzle()

    + +
    +
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IdentityBlockSwizzle::IdentityBlockSwizzle ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ swizzle()

    + +
    +
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE dim3 cutlass::gemm::IdentityBlockSwizzle::swizzle ()
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig-members.html new file mode 100644 index 00000000..bb78c951 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    InstructionShape typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kStagescutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kThreadscutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kWarpSizecutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    OutputTile typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    ScalarA typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    ScalarB typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    ScalarC typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    ScalarD typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    Warps typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.html new file mode 100644 index 00000000..cbcfef45 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.html @@ -0,0 +1,177 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ >:
    +
    +
    + + +cutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    typedef int8_t ScalarA
     The scalar for A. More...
     
    typedef int8_t ScalarB
     The scalar for B. More...
     
    typedef ScalarD_ ScalarC
     The scalar for C. More...
     
    typedef ScalarD_ ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int > MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    static int const kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA
     
    static int const kScalarsPerLdsA
     
    static int const kScalarsPerLdgB
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB
     
    static int const kScalarsPerLdsB
     
    static int const kScalarsPerLdgC
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD
     
    static int const kScalarsPerLdsD
     
    static int const kAccumulatorsPerLdsA
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB
     
    static int const kStages
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.png new file mode 100644 index 00000000..13377f84 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4-members.html new file mode 100644 index 00000000..0d364dc9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    InstructionShape typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kStagescutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kThreadscutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kWarpSizecutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    OutputTile typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    ScalarA typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    ScalarB typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    ScalarC typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    ScalarD typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    Warps typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html new file mode 100644 index 00000000..948f565c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html @@ -0,0 +1,177 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >:
    +
    +
    + + +cutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    typedef int8_t ScalarA
     The scalar for A. More...
     
    typedef int8_t ScalarB
     The scalar for B. More...
     
    typedef int8_t ScalarC
     The scalar for C. More...
     
    typedef int8_t ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int > MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    static int const kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA
     
    static int const kScalarsPerLdsA
     
    static int const kScalarsPerLdgB
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB
     
    static int const kScalarsPerLdsB
     
    static int const kScalarsPerLdgC
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD
     
    static int const kScalarsPerLdsD
     
    static int const kAccumulatorsPerLdsA
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB
     
    static int const kStages
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.png new file mode 100644 index 00000000..584bcfd7 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits-members.html new file mode 100644 index 00000000..f3edab99 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Delta typedefcutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Iterations typedefcutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ThreadsDelta typedefcutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html new file mode 100644 index 00000000..3aaf681c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html @@ -0,0 +1,254 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
     The base class. More...
     
    typedef Base::Threads Threads
     The threads. More...
     
    typedef Shape< Base::Threads::kH *4, 1, Base::Threads::kW, Base::kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< Base::Tile::kH/Base::Threads::kH/4, 4, Base::Tile::kW/Base::Threads::kW, Base::Tile::kC/Base::kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< 1, 4, Base::Tile::kC > ThreadsDelta
     The threads strides. More...
     
    - Public Types inherited from cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile shape. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads shape. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsDelta
     The relative offset between two elements in the H/W dimension in adjacent threads. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSizeImmediateOffsetStrides
     Strides for immediate offset computation. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Static Public Attributes inherited from cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    static GemmOperand::Kind const kOperand = kOperand_
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout = kLayout_
     The layout. More...
     
    static int const kAccessSize = kAccessSize_
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kGlobal
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef GemmGlobalTileTraits<kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_> cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<Base::Threads::kH * 4, 1, Base::Threads::kW, Base::kAccessSize> cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<Base::Tile::kH / Base::Threads::kH / 4, 4, Base::Tile::kW / Base::Threads::kW, Base::Tile::kC / Base::kAccessSize> cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Iterations
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Base::Threads cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsDelta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<1, 4, Base::Tile::kC> cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadsDelta
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.png new file mode 100644 index 00000000..4a33ed8b Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..6159417c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..580eee07 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <igemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue-members.html new file mode 100644 index 00000000..73d16eae --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue-members.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Base typedefcutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >
    epilogue(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    epilogue_with_or_without_beta(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    Functor typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GemmEpilogue(Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >inline
    Index typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Iterations typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    mcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ncutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    OutputTile typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Params typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    paramscutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Scalar typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_load_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    shared_storagecutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_store_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedLoadTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStorage typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Traits typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.html new file mode 100644 index 00000000..fdd93ee4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.html @@ -0,0 +1,275 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >:
    +
    +
    + + +cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmEpilogue< GemmEpilogueTraits_ > Base
     The base class. More...
     
    - Public Types inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    typedef GemmEpilogueTraits_ Traits
     The traits class. More...
     
    typedef Traits::Params Params
     The params. More...
     
    typedef Traits::SharedStorage SharedStorage
     The shared storage. More...
     
    typedef Traits::OutputTile OutputTile
     The output tile. More...
     
    typedef Traits::Iterations Iterations
     The number of iterations. More...
     
    typedef Traits::Accumulators Accumulators
     The accumulators. More...
     
    typedef Traits::Scalar Scalar
     The scalar. More...
     
    typedef Traits::Functor Functor
     The functor in charge of the math. More...
     
    typedef Traits::GlobalLoadIteratorC GlobalLoadIteratorC
     We do not support 3D or 4D shapes. More...
     
    typedef Traits::GlobalTransformerC GlobalTransformerC
     The transformer for C. More...
     
    typedef Traits::GlobalTransformerD GlobalTransformerD
     The transformer for D. More...
     
    typedef Traits::GlobalStoreIteratorD GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef Traits::SharedStoreIteratorD SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Traits::SharedStoreTransformerD SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef Traits::SharedLoadIteratorD SharedLoadIteratorD
     The iterator to load D in shared memory. More...
     
    typedef Copy< typename SharedLoadIteratorD::Fragment > SharedLoadTransformerD
     The shared load transformer for D. More...
     
    typedef Traits::Index Index
     The index. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IgemmEpilogue (typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)
     Ctor. More...
     
    - Public Member Functions inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    CUTLASS_DEVICE GemmEpilogue (Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)
     Ctor. More...
     
    CUTLASS_DEVICE void epilogue (Coord< 3 > const &block, Accumulators &accumulators)
     Execute the epilogue. More...
     
    template<bool kBetaIsZero_>
    CUTLASS_DEVICE void epilogue_with_or_without_beta (Coord< 3 > const &block, Accumulators &accumulators)
     
    CUTLASS_DEVICE void shared_load_fence ()
     The memory fence for shared loads. More...
     
    CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Attributes inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Params const & params
     The params. More...
     
    SharedStorageshared_storage
     The shared storage. More...
     
    Index m
     The dimensions of the GEMM. More...
     
    Index n
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ , bool = GemmEpilogueTraits_::kInt8Output>
    + + + + +
    typedef GemmEpilogue<GemmEpilogueTraits_> cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >::Base
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ IgemmEpilogue()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ , bool = GemmEpilogueTraits_::kInt8Output>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >::IgemmEpilogue (typename Base::Params const & params_,
    typename Base::SharedStorageshared_storage_,
    typename Base::Index m_,
    typename Base::Index n_ 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.png new file mode 100644 index 00000000..f10d072e Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar-members.html new file mode 100644 index 00000000..9d49d21c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogueScalar< ScalarD_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogueScalar< ScalarD_ >, including all inherited members.

    + + +
    Scalar typedefcutlass::gemm::IgemmEpilogueScalar< ScalarD_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html new file mode 100644 index 00000000..9ae00dbd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogueScalar< ScalarD_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogueScalar< ScalarD_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef float Scalar
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename ScalarD_ >
    + + + + +
    typedef float cutlass::gemm::IgemmEpilogueScalar< ScalarD_ >::Scalar
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4-members.html new file mode 100644 index 00000000..b7179e43 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogueScalar< int > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogueScalar< int >, including all inherited members.

    + + +
    Scalar typedefcutlass::gemm::IgemmEpilogueScalar< int >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html new file mode 100644 index 00000000..ab260d70 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogueScalar< int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogueScalar< int > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef int Scalar
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    + + + + +
    typedef int cutlass::gemm::IgemmEpilogueScalar< int >::Scalar
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits-members.html new file mode 100644 index 00000000..fc77b4b7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits-members.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Delta typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Functor typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Index typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Iterations typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    kInt8Outputcutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ >static
    OutputTile typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Scalar typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    ScalarC typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    ScalarD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html new file mode 100644 index 00000000..16b5df04 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html @@ -0,0 +1,187 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ >:
    +
    +
    + + +cutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ > + +
    + + + + + +

    +Static Public Attributes

    static bool const kInt8Output
     Do we output in int8? More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    typedef IgemmConfig_::OutputTile OutputTile
     The output tile. More...
     
    typedef IgemmConfig_::Accumulators Accumulators
     
    typedef Helper_::GlobalLoadIteratorC GlobalLoadIteratorC
     The iterator for C in global memory. More...
     
    typedef Helper_::GlobalTransformerC GlobalTransformerC
     The transformer for C. More...
     
    typedef Helper_::GlobalTransformerD GlobalTransformerD
     The transformer for D. More...
     
    typedef Helper_::GlobalStoreIteratorD GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef Helper_::SharedStoreIteratorD SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Helper_::SharedStoreTransformerD SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef Helper_::SharedLoadIteratorD SharedLoadIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Helper_::Iterations Iterations
     typedef typename GemmConfig::EpilogueIterations Iterations; More...
     
    typedef Helper_::Delta Delta
     The iterations strides. More...
     
    typedef EpilogueFunctor_ Functor
     The functor in charge of the math. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef Functor::Scalar Scalar
     We do not support 3D or 4D shapes. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    +

    Member Data Documentation

    + +

    ◆ kInt8Output

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ = int, typename Helper_ = IgemmEpilogueTraitsHelper<IgemmConfig_, EpilogueFunctor_, Index_>>
    + + + + + +
    + + + + +
    bool const cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ >::kInt8Output
    +
    +static
    +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.png new file mode 100644 index 00000000..ed357320 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper-members.html new file mode 100644 index 00000000..66ddf397 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper-members.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    Delta typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    Functor typedefcutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalFragmentC typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalFragmentD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalLoadIteratorC typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalLoadTileTraits typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalStoreIteratorD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalStoreTileTraits typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalTransformerC typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalTransformerD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    IgemmConfig typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    Iterations typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    OutputTile typedefcutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    Scalar typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedLoadIteratorD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedLoadTileTraits typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreFragmentD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreIteratorD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreTileTraits typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreTransformerD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html new file mode 100644 index 00000000..555b0ce9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html @@ -0,0 +1,518 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >:
    +
    +
    + + +cutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Base
     The base class. More...
     
    typedef IgemmConfig_ IgemmConfig
     The config. More...
     
    typedef Base::Scalar Scalar
     The scalar type of the epilogue. More...
     
    typedef Base::Iterations Iterations
     The iterations. More...
     
    typedef Base::Delta Delta
     The iterations strides. More...
     
    typedef Base::GlobalLoadTileTraits GlobalLoadTileTraits
     The traits class for the iterator. More...
     
    typedef GemmGlobalIteratorCd< GlobalLoadTileTraitsGlobalLoadIteratorC
     The iterator to store to shared memory. More...
     
    typedef GlobalLoadIteratorC::Fragment GlobalFragmentC
     The fragment that needs to be produced by the load iterator. More...
     
    typedef IgemmGlobalLoadTransformer< GlobalFragmentC, Scalar >::Transformer GlobalTransformerC
     The transformer from loaded data to math fragment. More...
     
    typedef Base::GlobalStoreTileTraits GlobalStoreTileTraits
     The traits class for the iterator. More...
     
    typedef GemmGlobalIteratorCd< GlobalStoreTileTraitsGlobalStoreIteratorD
     The iterator to store to shared memory. More...
     
    typedef GlobalStoreIteratorD::Fragment GlobalFragmentD
     The fragment that needs to be passed to that store iterator. More...
     
    typedef IgemmGlobalStoreTransformer< Scalar, GlobalFragmentD >::Transformer GlobalTransformerD
     The transformer from accumulators to shared memory fragments. More...
     
    typedef Base::SharedStoreTileTraits SharedStoreTileTraits
     The traits class for the shared iterator to store D to shared memory. More...
     
    typedef TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kGlobalSharedStoreIteratorD
     The shared iterator to store D to shared memory. More...
     
    typedef SharedStoreIteratorD::Fragment SharedStoreFragmentD
     The fragment that needs to be passed to that store iterator. More...
     
    typedef IgemmSharedStoreTransformer< typename IgemmConfig::Accumulators::Element, SharedStoreFragmentD >::Transformer SharedStoreTransformerD
     The transformer from accumulators to shared memory fragments. More...
     
    typedef Base::SharedLoadTileTraits SharedLoadTileTraits
     The traits class for the shared iterator to load D from shared memory. More...
     
    typedef TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorD
     The shared iterator to load D from shared memory. More...
     
    - Public Types inherited from cutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    typedef EpilogueFunctor_::Scalar Scalar
     The scalar. More...
     
    typedef IgemmConfig_ ::OutputTile OutputTile
     The output tile. More...
     
    typedef Shape< 1, IgemmConfig_ ::MultiplyAdd::AccumulatorsPerThread::kH/IgemmConfig_ ::kAccumulatorsPerLdsB, IgemmConfig_ ::kAccumulatorsPerLdsB > Iterations
     The number of iterations in the epilogue. More...
     
    typedef Shape< 0, IgemmConfig_ ::kAccumulatorsPerLdsB *(IgemmConfig_ ::Warps::kH *IgemmConfig_ ::MultiplyAdd::ThreadsPerWarp::kH - 1), 0 > Delta
     
    typedef EpilogueFunctor_ Functor
     The functor to do the math in the epilogue. More...
     
    typedef GemmSharedStoreTileDTraits< typename Functor::Scalar, typename IgemmConfig_ ::OutputTile, typename IgemmConfig_ ::Warps, typename IgemmConfig_ ::MultiplyAdd::ThreadsPerWarp, IgemmConfig_ ::kScalarsPerStsD, 128/sizeof(typename IgemmConfig_ ::ScalarD)/IgemmConfig_ ::kScalarsPerStsD/2 *IgemmConfig_ ::kScalarsPerStsD > SharedStoreTileTraits
     The traits class to build the iterator to store to shared memory for D. More...
     
    typedef TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorD
     The iterator to store D to shared memory. More...
     
    typedef Copy< typename SharedStoreIteratorD::FragmentSharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef GemmSharedLoadTileDTraits< typename Functor::Scalar, typename IgemmConfig_ ::OutputTile, typename IgemmConfig_ ::Warps, typename IgemmConfig_ ::MultiplyAdd::ThreadsPerWarp, IgemmConfig_ ::OutputTile::kH/ShapeCount< Iterations >::kCount, IgemmConfig_ ::kScalarsPerLdsD, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for D. More...
     
    typedef TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorD
     The iterator to load D from shared memory. More...
     
    typedef GemmGlobalTileCdTraits< typename IgemmConfig_ ::ScalarC const, Shape< 1, IgemmConfig_ ::OutputTile::kH/ShapeCount< Iterations >::kCount, IgemmConfig_ ::OutputTile::kW >, Shape< 1, ShapeCount< typename IgemmConfig_ ::Warps >::kCount, IgemmConfig_ ::kWarpSize >, Iterations::kW, IgemmConfig_ ::kScalarsPerLdgC > GlobalLoadTileTraits
     The traits class to build the iterator to load data from global memory for C^N. More...
     
    typedef GemmGlobalIteratorCd< GlobalLoadTileTraits, Index_ > GlobalLoadIteratorC
     The iterator to load C. More...
     
    typedef Copy< typename GlobalLoadIteratorC::FragmentGlobalTransformerC
     The transformer for C. More...
     
    typedef GemmGlobalTileCdTraits< typename IgemmConfig_ ::ScalarD, Shape< 1, IgemmConfig_ ::OutputTile::kH/ShapeCount< Iterations >::kCount, IgemmConfig_ ::OutputTile::kW >, Shape< 1, ShapeCount< typename IgemmConfig_ ::Warps >::kCount, IgemmConfig_ ::kWarpSize >, Iterations::kW, IgemmConfig_ ::kScalarsPerStgD > GlobalStoreTileTraits
     The traits class to build the iterator to store data to global memory for D^N. More...
     
    typedef GemmGlobalIteratorCd< GlobalStoreTileTraits, Index_ > GlobalStoreIteratorD
     The iterator to store D. More...
     
    typedef Copy< typename GlobalStoreIteratorD::FragmentGlobalTransformerD
     The transformer for D. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef GemmEpilogueTraitsHelper<IgemmConfig_, EpilogueFunctor_, Index_> cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::Delta cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::Delta
    +
    + +
    +
    + +

    ◆ GlobalFragmentC

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef GlobalLoadIteratorC::Fragment cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalFragmentC
    +
    + +
    +
    + +

    ◆ GlobalFragmentD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef GlobalStoreIteratorD::Fragment cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalFragmentD
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorC

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef GemmGlobalIteratorCd<GlobalLoadTileTraits> cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadIteratorC
    +
    + +
    +
    + +

    ◆ GlobalLoadTileTraits

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::GlobalLoadTileTraits cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadTileTraits
    +
    + +
    +
    + +

    ◆ GlobalStoreIteratorD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef GemmGlobalIteratorCd<GlobalStoreTileTraits> cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreIteratorD
    +
    + +
    +
    + +

    ◆ GlobalStoreTileTraits

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::GlobalStoreTileTraits cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreTileTraits
    +
    + +
    +
    + +

    ◆ GlobalTransformerC

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef IgemmGlobalLoadTransformer<GlobalFragmentC, Scalar>::Transformer cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerC
    +
    + +
    +
    + +

    ◆ GlobalTransformerD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef IgemmGlobalStoreTransformer<Scalar, GlobalFragmentD>::Transformer cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerD
    +
    + +
    +
    + +

    ◆ IgemmConfig

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef IgemmConfig_ cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::IgemmConfig
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::Iterations cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::Iterations
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::Scalar cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef TileLoadIterator<SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadIteratorD
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::SharedLoadTileTraits cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreFragmentD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef SharedStoreIteratorD::Fragment cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreFragmentD
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef TileStoreIterator<SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal> cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreIteratorD
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::SharedStoreTileTraits cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTransformerD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef IgemmSharedStoreTransformer<typename IgemmConfig::Accumulators::Element, SharedStoreFragmentD>::Transformer cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTransformerD
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.png new file mode 100644 index 00000000..5a6a3698 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4-members.html new file mode 100644 index 00000000..7b975671 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4-members.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Base typedefcutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >
    epilogue(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    epilogue_with_or_without_beta(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    Functor typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GemmEpilogue(Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >inline
    Index typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Iterations typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    mcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ncutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    OutputTile typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Params typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    paramscutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Scalar typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_load_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    shared_storagecutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_store_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedLoadTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStorage typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Traits typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html new file mode 100644 index 00000000..37e7daa8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html @@ -0,0 +1,275 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >:
    +
    +
    + + +cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmEpilogue< GemmEpilogueTraits_ > Base
     The base class. More...
     
    - Public Types inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    typedef GemmEpilogueTraits_ Traits
     The traits class. More...
     
    typedef Traits::Params Params
     The params. More...
     
    typedef Traits::SharedStorage SharedStorage
     The shared storage. More...
     
    typedef Traits::OutputTile OutputTile
     The output tile. More...
     
    typedef Traits::Iterations Iterations
     The number of iterations. More...
     
    typedef Traits::Accumulators Accumulators
     The accumulators. More...
     
    typedef Traits::Scalar Scalar
     The scalar. More...
     
    typedef Traits::Functor Functor
     The functor in charge of the math. More...
     
    typedef Traits::GlobalLoadIteratorC GlobalLoadIteratorC
     We do not support 3D or 4D shapes. More...
     
    typedef Traits::GlobalTransformerC GlobalTransformerC
     The transformer for C. More...
     
    typedef Traits::GlobalTransformerD GlobalTransformerD
     The transformer for D. More...
     
    typedef Traits::GlobalStoreIteratorD GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef Traits::SharedStoreIteratorD SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Traits::SharedStoreTransformerD SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef Traits::SharedLoadIteratorD SharedLoadIteratorD
     The iterator to load D in shared memory. More...
     
    typedef Copy< typename SharedLoadIteratorD::Fragment > SharedLoadTransformerD
     The shared load transformer for D. More...
     
    typedef Traits::Index Index
     The index. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IgemmEpilogue (typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)
     Ctor. More...
     
    - Public Member Functions inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    CUTLASS_DEVICE GemmEpilogue (Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)
     Ctor. More...
     
    CUTLASS_DEVICE void epilogue (Coord< 3 > const &block, Accumulators &accumulators)
     Execute the epilogue. More...
     
    template<bool kBetaIsZero_>
    CUTLASS_DEVICE void epilogue_with_or_without_beta (Coord< 3 > const &block, Accumulators &accumulators)
     
    CUTLASS_DEVICE void shared_load_fence ()
     The memory fence for shared loads. More...
     
    CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Attributes inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Params const & params
     The params. More...
     
    SharedStorageshared_storage
     The shared storage. More...
     
    Index m
     The dimensions of the GEMM. More...
     
    Index n
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef GemmEpilogue<GemmEpilogueTraits_> cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::Base
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ IgemmEpilogue()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::IgemmEpilogue (typename Base::Params const & params_,
    typename Base::SharedStorageshared_storage_,
    typename Base::Index m_,
    typename Base::Index n_ 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.png new file mode 100644 index 00000000..5b7af1e9 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter-members.html new file mode 100644 index 00000000..95e068d1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmFloatToInt8Converter< kElements_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html new file mode 100644 index 00000000..35a75abe --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html @@ -0,0 +1,265 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmFloatToInt8Converter< kElements_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmFloatToInt8Converter< kElements_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + + + + + +

    +Public Types

    typedef Fragment< float, kElements_ > InputFragment
     The input fragment. More...
     
    typedef Fragment< int8_t, kElements_ > OutputFragment
     The output fragment. More...
     
    + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IgemmFloatToInt8Converter ()
     Ctor. More...
     
    CUTLASS_DEVICE void transform (InputFragment const &src, OutputFragment &dst)
     Transform a fragment. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void transform (Fragment_ const &src, int offset, OutputFragment &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InputFragment

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef Fragment<float, kElements_> cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef Fragment<int8_t, kElements_> cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ IgemmFloatToInt8Converter()

    + +
    +
    +
    +template<int kElements_>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >::IgemmFloatToInt8Converter ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform() [1/2]

    + +
    +
    +
    +template<int kElements_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >::transform (InputFragment const & src,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ transform() [2/2]

    + +
    +
    +
    +template<int kElements_>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >::transform (Fragment_ const & src,
    int offset,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer-members.html new file mode 100644 index 00000000..0dc73a82 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html new file mode 100644 index 00000000..43ba1df7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + +

    +Public Types

    typedef Convert< InputFragment_, Fragment< OutputScalar_, InputFragment_::kElements > > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename InputFragment_, typename OutputScalar_>
    + + + + +
    typedef Convert<InputFragment_, Fragment<OutputScalar_, InputFragment_::kElements> > cutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4-members.html new file mode 100644 index 00000000..1e2db952 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html new file mode 100644 index 00000000..18168712 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + +

    +Public Types

    typedef IgemmInt8ToFloatConverter< kElements_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef IgemmInt8ToFloatConverter<kElements_> cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer-members.html new file mode 100644 index 00000000..7d045266 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html new file mode 100644 index 00000000..134180f5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + +

    +Public Types

    typedef Convert< Fragment< InputScalar_, OutputFragment_::kElements >, OutputFragment_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename InputScalar_, typename OutputFragment_>
    + + + + +
    typedef Convert<Fragment<InputScalar_, OutputFragment_::kElements>, OutputFragment_> cutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4-members.html new file mode 100644 index 00000000..4a235542 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html new file mode 100644 index 00000000..4d6a68c1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + +

    +Public Types

    typedef IgemmFloatToInt8Converter< kElements_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef IgemmFloatToInt8Converter<kElements_> cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter-members.html new file mode 100644 index 00000000..0e462c73 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html new file mode 100644 index 00000000..00a382f7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html @@ -0,0 +1,265 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + + + + + +

    +Public Types

    typedef Fragment< int8_t, kElements_ > InputFragment
     The input fragment. More...
     
    typedef Fragment< float, kElements_ > OutputFragment
     The output fragment. More...
     
    + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IgemmInt8ToFloatConverter ()
     Ctor. More...
     
    CUTLASS_DEVICE void transform (InputFragment const &src, OutputFragment &dst)
     Transform a fragment. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void transform (Fragment_ const &src, int offset, OutputFragment &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InputFragment

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef Fragment<int8_t, kElements_> cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef Fragment<float, kElements_> cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ IgemmInt8ToFloatConverter()

    + +
    +
    +
    +template<int kElements_>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >::IgemmInt8ToFloatConverter ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform() [1/2]

    + +
    +
    +
    +template<int kElements_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >::transform (InputFragment const & src,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ transform() [2/2]

    + +
    +
    +
    +template<int kElements_>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >::transform (Fragment_ const & src,
    int offset,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer-members.html new file mode 100644 index 00000000..dcdcd512 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html new file mode 100644 index 00000000..089013d3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + +

    +Public Types

    typedef Convert< Fragment< InputScalar_, OutputFragment_::kElements >, OutputFragment_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename InputScalar_, typename OutputFragment_>
    + + + + +
    typedef Convert<Fragment<InputScalar_, OutputFragment_::kElements>, OutputFragment_> cutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle-members.html new file mode 100644 index 00000000..801a9530 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmSwizzle< GlobalIterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle.html new file mode 100644 index 00000000..6f2c5963 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle.html @@ -0,0 +1,273 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmSwizzle< GlobalIterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmSwizzle< GlobalIterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_swizzle.h>

    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GlobalIterator_ GlobalIterator
     The global iterator. More...
     
    typedef GlobalIterator::Fragment Fragment
     The source fragment. More...
     
    typedef GlobalIterator::FragmentShape FragmentShape
     The shape of the source fragment. More...
     
    typedef Fragment InputFragment
     The source fragment. More...
     
    typedef Fragment OutputFragment
     The destination fragment. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IgemmSwizzle ()
     The src/dst must be int8 fragments. More...
     
    CUTLASS_DEVICE void transform (Fragment const &src, Fragment &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator::Fragment cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator::FragmentShape cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ GlobalIterator

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator_ cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::GlobalIterator
    +
    + +
    +
    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef Fragment cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef Fragment cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ IgemmSwizzle()

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::IgemmSwizzle ()
    +
    +inline
    +
    +

    The number of elements must be a multiple of 4. Ctor.

    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform()

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::transform (Fragment const & src,
    Fragmentdst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html new file mode 100644 index 00000000..e26b4c61 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTileTraitsHelperA< kLayout_, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTileTraitsHelperA< kLayout_, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmTileTraitsHelperA< kLayout_, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperA< kLayout_, GemmConfig_ > + +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.png new file mode 100644 index 00000000..d055e029 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 00000000..6a93f43b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html new file mode 100644 index 00000000..27a96f2a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,218 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Base
     The base config. More...
     
    typedef IgemmContiguousGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^N. More...
     
    typedef GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kW *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsASharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^N. More...
     
    - Public Types inherited from cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
    typedef GemmConfig_::ScalarA Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^N. More...
     
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^N. More...
     
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, 0 > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for A^N. More...
     
    + + + + + + + + +

    +Static Public Attributes

    static int const kScalarsPerStsA = 16
     The number of scalars per LDG/STS/LDS for A. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
    static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor
     The layout. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Base
    +
    + +
    +
    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef IgemmContiguousGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, int8_t const, Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, 4> cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreTileAbTraits< int8_t, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kW * 4>, typename GlobalTileTraits::Threads, kScalarsPerStsA> cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kScalarsPerStsA

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    int const cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsPerStsA = 16
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png new file mode 100644 index 00000000..e13efd74 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.html new file mode 100644 index 00000000..56b03b2b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTileTraitsHelperB< kLayout_, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTileTraitsHelperB< kLayout_, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmTileTraitsHelperB< kLayout_, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperB< kLayout_, GemmConfig_ > + +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.png new file mode 100644 index 00000000..27dbc2cd Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 00000000..c4c05c58 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html new file mode 100644 index 00000000..de98c371 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,218 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Base
     The base config. More...
     
    typedef IgemmContiguousGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^T. More...
     
    typedef GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kH *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsBSharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^N. More...
     
    - Public Types inherited from cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
    typedef GemmConfig_::ScalarB Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^T. More...
     
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^T. More...
     
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, 0 > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for B^T. More...
     
    + + + + + + + + +

    +Static Public Attributes

    static int const kScalarsPerStsB = 16
     The number of scalars per LDG/STS/LDS for B. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
    static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor
     The layout. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Base
    +
    + +
    +
    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef IgemmContiguousGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, int8_t const, Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, 4> cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreTileAbTraits< int8_t, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kH * 4>, typename GlobalTileTraits::Threads, kScalarsPerStsB> cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kScalarsPerStsB

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    int const cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsPerStsB = 16
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png new file mode 100644 index 00000000..0703b778 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits-members.html new file mode 100644 index 00000000..23f0fe6d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits-members.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    Epilogue typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GemmConfig typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    Index typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    kLayoutAcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >static
    kLayoutBcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    OutputTile typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarC typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarD typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.html new file mode 100644 index 00000000..92ff0ed8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.html @@ -0,0 +1,172 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ >:
    +
    +
    + + +cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    typedef Helper_::GemmConfig GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef Helper_::GlobalLoadStreamA GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamA ::Scalar ScalarA
     The scalar for A. More...
     
    typedef Helper_::GlobalLoadStreamB GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamB ::Scalar ScalarB
     The scalar for B. More...
     
    typedef Helper_::SharedLoadStreamA SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef Helper_::SharedLoadStreamB SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef Helper_::Epilogue Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef IdentityBlockSwizzle BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef Helper_::ClearAccumulators ClearAccumulators
     Clear the accumulators. More...
     
    - Static Public Member Functions inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    static MatrixLayout::Kind const kLayoutA
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB
     The layout of B. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.png new file mode 100644 index 00000000..7f98448c Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper-members.html new file mode 100644 index 00000000..9c138df7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper-members.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + +
    ClearAccumulators typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    Epilogue typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GemmConfig typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GemmTileTraitsHelperA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GemmTileTraitsHelperB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalLoadIteratorA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalLoadIteratorB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalLoadStreamA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalLoadStreamB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalTransformerA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalTransformerB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    MultiplyAdd typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedLoadIteratorA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedLoadIteratorB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedLoadStreamA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedLoadStreamB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedStoreIteratorA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedStoreIteratorB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper.html new file mode 100644 index 00000000..3ac649b1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper.html @@ -0,0 +1,441 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > GemmConfig
     The IGEMM config. More...
     
    typedef IgemmTileTraitsHelperA< kLayoutA_, GemmConfigGemmTileTraitsHelperA
     The GEMM config for A. More...
     
    typedef IgemmTileTraitsHelperB< kLayoutB_, GemmConfigGemmTileTraitsHelperB
     The GEMM config for B. More...
     
    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
     The iterator to load A from global memory. More...
     
    typedef IgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
     The default transformer for A. More...
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorA
     The iterator to store A to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerAGlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
     The iterator to load B from global memory. More...
     
    typedef IgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorB
     The iterator to store B to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerBGlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorA
     The iterator to load A from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorA, Copy< typename SharedLoadIteratorA::Fragment > > SharedLoadStreamA
     The stream to load A from shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorB
     The iterator to load B from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorB, Copy< typename SharedLoadIteratorB::Fragment > > SharedLoadStreamB
     The stream to load B from shared memory. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef ClearAccumulators< typename MultiplyAdd::ScalarCClearAccumulators
     The object to clear accumulators. More...
     
    typedef IgemmEpilogue< IgemmEpilogueTraits< GemmConfig, EpilogueFunctor_ > > Epilogue
     The epilogue. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ ClearAccumulators

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef ClearAccumulators<typename MultiplyAdd::ScalarC> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::ClearAccumulators
    +
    + +
    +
    + +

    ◆ Epilogue

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmEpilogue<IgemmEpilogueTraits<GemmConfig, EpilogueFunctor_> > cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::Epilogue
    +
    + +
    +
    + +

    ◆ GemmConfig

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmConfig<OutputTile_, ScalarD_, AccumulatorsPerThread_> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GemmConfig
    +
    + +
    +
    + +

    ◆ GemmTileTraitsHelperA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmTileTraitsHelperA<kLayoutA_, GemmConfig> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GemmTileTraitsHelperA
    +
    + +
    +
    + +

    ◆ GemmTileTraitsHelperB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmTileTraitsHelperB<kLayoutB_, GemmConfig> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GemmTileTraitsHelperB
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperA::GlobalTileTraits, Index_> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalLoadIteratorA
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperB::GlobalTileTraits, Index_> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalLoadIteratorB
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalLoadStreamA
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalLoadStreamB
    +
    + +
    +
    + +

    ◆ GlobalTransformerA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmTransformerA<GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA>::Transformer cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalTransformerA
    +
    + +
    +
    + +

    ◆ GlobalTransformerB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmTransformerB<GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB>::Transformer cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalTransformerB
    +
    + +
    +
    + +

    ◆ MultiplyAdd

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef GemmConfig::MultiplyAdd cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::MultiplyAdd
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedLoadIteratorA
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedLoadIteratorB
    +
    + +
    +
    + +

    ◆ SharedLoadStreamA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorA, Copy<typename SharedLoadIteratorA::Fragment> > cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedLoadStreamA
    +
    + +
    +
    + +

    ◆ SharedLoadStreamB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorB, Copy<typename SharedLoadIteratorB::Fragment> > cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedLoadStreamB
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedStoreIteratorA
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedStoreIteratorB
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA.html new file mode 100644 index 00000000..73e523bb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html new file mode 100644 index 00000000..b31cf3bb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html new file mode 100644 index 00000000..7135e2be --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef IgemmSwizzle< Iterator_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef IgemmSwizzle<Iterator_> cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html new file mode 100644 index 00000000..e8d627ef --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html new file mode 100644 index 00000000..3c826b8a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef Copy< typename Iterator_::Fragment > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef Copy<typename Iterator_::Fragment> cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB.html new file mode 100644 index 00000000..fa606b24 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerB< kLayout_, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerB< kLayout_, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html new file mode 100644 index 00000000..ed73de85 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html new file mode 100644 index 00000000..61c70cf9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef Copy< typename Iterator_::Fragment > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef Copy<typename Iterator_::Fragment> cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html new file mode 100644 index 00000000..709672d2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html new file mode 100644 index 00000000..836dae8a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef IgemmSwizzle< Iterator_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef IgemmSwizzle<Iterator_> cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling-members.html new file mode 100644 index 00000000..16552547 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling.html b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling.html new file mode 100644 index 00000000..d79eb6f6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling.html @@ -0,0 +1,319 @@ + + + + + + + +Cutlass: cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ > Struct Template Reference
    +
    +
    + +

    Functor to compute linear combination of fragments. +

    + +

    #include <linear_scaling.h>

    + + + + + +

    +Classes

    struct  Params
     The parameters. More...
     
    + + + + + +

    +Public Types

    typedef Scalar_ Scalar
     
    typedef FragmentMultiplyAdd_ FragmentMultiplyAdd
     
    + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE LinearScaling (Params const &params)
     Ctor. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void evaluate (Fragment_ const &accum, Fragment_ &output)
     Evaluate the functor. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void evaluate (Fragment_ const &accum, Fragment_ const &old, Fragment_ &output)
     Evaluate the functor. More...
     
    + + + + + + +

    +Public Attributes

    Scalar alpha
     The alpha/beta scaling factors. More...
     
    Scalar beta
     
    +

    Member Typedef Documentation

    + +

    ◆ FragmentMultiplyAdd

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    typedef FragmentMultiplyAdd_ cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::FragmentMultiplyAdd
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    typedef Scalar_ cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Scalar
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ LinearScaling()

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::LinearScaling (Params const & params)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ evaluate() [1/2]

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::evaluate (Fragment_ const & accum,
    Fragment_ & output 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ evaluate() [2/2]

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::evaluate (Fragment_ const & accum,
    Fragment_ const & old,
    Fragment_ & output 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ alpha

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    Scalar cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::alpha
    +
    + +
    +
    + +

    ◆ beta

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    Scalar cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::beta
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params-members.html new file mode 100644 index 00000000..4342065c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html new file mode 100644 index 00000000..605e0b8f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html @@ -0,0 +1,179 @@ + + + + + + + +Cutlass: cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params Struct Reference
    +
    +
    + +

    The parameters. +

    + +

    #include <linear_scaling.h>

    + + + + + + +

    +Public Member Functions

    template<typename GemmDesc_ >
    CUTLASS_HOST_DEVICE int initialize (GemmDesc_ const &desc)
     Initialize the parameters. More...
     
    + + + + + + +

    +Public Attributes

    Scalar alpha
     The alpha/beta scaling params. More...
     
    Scalar beta
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    +
    +template<typename GemmDesc_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params::initialize (GemmDesc_ const & desc)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ alpha

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    Scalar cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params::alpha
    +
    + +
    +
    + +

    ◆ beta

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    Scalar cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params::beta
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand.html new file mode 100644 index 00000000..6b5a3958 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: cutlass::gemm::ProjectOperand< operand, Kstrided > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ProjectOperand< operand, Kstrided > Struct Template Reference
    +
    +
    + +

    #include <gemm_operand.h>

    +

    Detailed Description

    +

    template<GemmOperand::Kind operand, bool Kstrided = true>
    +struct cutlass::gemm::ProjectOperand< operand, Kstrided >

    + +

    Project's a coordinate (K, N, M) onto inner and outer dimensions defined for each operand.

    +

    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4-members.html new file mode 100644 index 00000000..cbf6186d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >, including all inherited members.

    + + +
    project(Coord< 3 > const &coord)cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html new file mode 100644 index 00000000..50d95335 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided > Struct Template Reference
    +
    +
    + +

    Project A operand - (0, K, M) +

    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_HOST_DEVICE Coord< 3 > project (Coord< 3 > const &coord)
     
    +

    Member Function Documentation

    + +

    ◆ project()

    + +
    +
    +
    +template<bool Kstrided>
    + + + + + +
    + + + + + + + + +
    static CUTLASS_HOST_DEVICE Coord<3> cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >::project (Coord< 3 > const & coord)
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4-members.html new file mode 100644 index 00000000..ab297d48 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >, including all inherited members.

    + + +
    project(Coord< 3 > const &coord)cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html new file mode 100644 index 00000000..7021dbf8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided > Struct Template Reference
    +
    +
    + +

    Project B operand - (0, K, N) +

    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_HOST_DEVICE Coord< 3 > project (Coord< 3 > const &coord)
     
    +

    Member Function Documentation

    + +

    ◆ project()

    + +
    +
    +
    +template<bool Kstrided>
    + + + + + +
    + + + + + + + + +
    static CUTLASS_HOST_DEVICE Coord<3> cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >::project (Coord< 3 > const & coord)
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4-members.html new file mode 100644 index 00000000..79bf952f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kC, true > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ProjectOperand< GemmOperand::kC, true >, including all inherited members.

    + + +
    project(Coord< 3 > const &coord)cutlass::gemm::ProjectOperand< GemmOperand::kC, true >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html new file mode 100644 index 00000000..8e2882f9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html @@ -0,0 +1,131 @@ + + + + + + + +Cutlass: cutlass::gemm::ProjectOperand< GemmOperand::kC, true > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kC, true > Struct Template Reference
    +
    +
    + +

    Project C operand - (0, N, M) +

    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_HOST_DEVICE Coord< 3 > project (Coord< 3 > const &coord)
     
    +

    Member Function Documentation

    + +

    ◆ project()

    + +
    +
    + + + + + +
    + + + + + + + + +
    static CUTLASS_HOST_DEVICE Coord<3> cutlass::gemm::ProjectOperand< GemmOperand::kC, true >::project (Coord< 3 > const & coord)
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4-members.html new file mode 100644 index 00000000..d20c5c9f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kD, true > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ProjectOperand< GemmOperand::kD, true >, including all inherited members.

    + + +
    project(Coord< 3 > const &coord)cutlass::gemm::ProjectOperand< GemmOperand::kD, true >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html new file mode 100644 index 00000000..5cf621e3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html @@ -0,0 +1,131 @@ + + + + + + + +Cutlass: cutlass::gemm::ProjectOperand< GemmOperand::kD, true > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kD, true > Struct Template Reference
    +
    +
    + +

    Project D operand - (0, N, M) +

    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_HOST_DEVICE Coord< 3 > project (Coord< 3 > const &coord)
     
    +

    Member Function Documentation

    + +

    ◆ project()

    + +
    +
    + + + + + +
    + + + + + + + + +
    static CUTLASS_HOST_DEVICE Coord<3> cutlass::gemm::ProjectOperand< GemmOperand::kD, true >::project (Coord< 3 > const & coord)
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads-members.html new file mode 100644 index 00000000..9d8d3d8a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >, including all inherited members.

    + + +
    Threads typedefcutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads.html b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads.html new file mode 100644 index 00000000..e4f6d4b5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    + + + + +

    +Public Types

    typedef Threads_ Threads
     
    +

    Member Typedef Documentation

    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Tile_, typename Threads_, bool = (Tile_::kW < Threads_::kW)>
    + + + + +
    typedef Threads_ cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >::Threads
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4-members.html new file mode 100644 index 00000000..722512f7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ReshapeThreads< Tile_, Threads_, true > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ReshapeThreads< Tile_, Threads_, true >, including all inherited members.

    + + +
    Threads typedefcutlass::gemm::ReshapeThreads< Tile_, Threads_, true >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html new file mode 100644 index 00000000..ccfacd4f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::ReshapeThreads< Tile_, Threads_, true > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ReshapeThreads< Tile_, Threads_, true > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    + + + + +

    +Public Types

    typedef Shape< Threads_::kD, Threads_::kH *Threads_::kW/Tile_::kW, Tile_::kW, 1 > Threads
     
    +

    Member Typedef Documentation

    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Tile_ , typename Threads_ >
    + + + + +
    typedef Shape<Threads_::kD, Threads_::kH * Threads_::kW / Tile_::kW, Tile_::kW, 1> cutlass::gemm::ReshapeThreads< Tile_, Threads_, true >::Threads
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig-members.html new file mode 100644 index 00000000..1cd9d193 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    InstructionShape typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kStagescutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kThreadscutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kWarpSizecutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    OutputTile typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    ScalarA typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    ScalarB typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    ScalarC typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    ScalarD typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    Warps typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.html b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.html new file mode 100644 index 00000000..41649897 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.html @@ -0,0 +1,177 @@ + + + + + + + +Cutlass: cutlass::gemm::SgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference
    +
    +
    + +

    #include <sgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::SgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >:
    +
    +
    + + +cutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    typedef float ScalarA
     The scalar for A. More...
     
    typedef float ScalarB
     The scalar for B. More...
     
    typedef float ScalarC
     The scalar for C. More...
     
    typedef float ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float > MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    static int const kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA
     
    static int const kScalarsPerLdsA
     
    static int const kScalarsPerLdgB
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB
     
    static int const kScalarsPerLdsB
     
    static int const kScalarsPerLdgC
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD
     
    static int const kScalarsPerLdsD
     
    static int const kAccumulatorsPerLdsA
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB
     
    static int const kStages
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.png b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.png new file mode 100644 index 00000000..5690d082 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits-members.html new file mode 100644 index 00000000..ad7350b7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits-members.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Epilogue typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GemmConfig typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Index typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    kLayoutAcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    kLayoutBcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    OutputTile typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarC typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarD typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.html new file mode 100644 index 00000000..87da2843 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: cutlass::gemm::SgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Struct Template Reference
    +
    +
    + +

    #include <sgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::SgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ >:
    +
    +
    + + +cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, GemmEpilogue< GemmEpilogueTraits_ >, Index_ > +cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    typedef GemmConfig_ GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA ::Scalar ScalarA
     The scalar for A. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB ::Scalar ScalarB
     The scalar for B. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef GemmEpilogue< GemmEpilogueTraits_ > Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef IdentityBlockSwizzle BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef ClearAccumulators< GemmConfig_::Accumulators::Element > ClearAccumulators
     Clear the accumulators. More...
     
    - Static Public Member Functions inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static MatrixLayout::Kind const kLayoutA
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB
     The layout of B. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.png new file mode 100644 index 00000000..3e5427bc Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream-members.html new file mode 100644 index 00000000..115015d0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >, including all inherited members.

    + + + + + + + + + + + + + + + + +
    commit(FetchedFragment &fetched, TransformedFragment &transformed)cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    copy(FetchedFragment &fetched)cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    copy(int d, FetchedFragment &fetched)cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    FetchedFragment typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    Fragment typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    inc_stage()cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    initialize(Params const &params, SharedStorage &shared_storage)cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    Iterator typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    iteratorcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    SharedLoadStream()cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    SharedLoadStream(Params const &params, SharedStorage &shared_storage)cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    SharedStorage typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    TransformedFragment typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    Transformer typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    transformercutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream.html new file mode 100644 index 00000000..4e2badd0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream.html @@ -0,0 +1,526 @@ + + + + + + + +Cutlass: cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_stream.h>

    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Iterator_ Iterator
     The load iterator. More...
     
    typedef Transformer_ Transformer
     The transformer. More...
     
    typedef Iterator::Fragment FetchedFragment
     The fragment that is copied from shared memory. More...
     
    typedef Transformer::OutputFragment TransformedFragment
     The fragment that is obtained after the transformation by the transformer. More...
     
    typedef TransformedFragment Fragment
     Make sure the fragments match. More...
     
    typedef Iterator::Storage SharedStorage
     The storage in shared memory needed by that stream. More...
     
    + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE SharedLoadStream ()
     Ctor. More...
     
    CUTLASS_DEVICE SharedLoadStream (Params const &params, SharedStorage &shared_storage)
     Ctor. More...
     
    CUTLASS_DEVICE void initialize (Params const &params, SharedStorage &shared_storage)
     Initialize the stream. More...
     
    CUTLASS_DEVICE void copy (FetchedFragment &fetched)
     Load the data from shared memory to the fetch fragment. More...
     
    CUTLASS_DEVICE void copy (int d, FetchedFragment &fetched)
     Load the data from shared memory to the fetch fragment. More...
     
    CUTLASS_DEVICE void commit (FetchedFragment &fetched, TransformedFragment &transformed)
     Commit the data. More...
     
    CUTLASS_DEVICE void inc_stage ()
     Increment the stage. More...
     
    + + + + + + + +

    +Public Attributes

    Iterator iterator
     The iterator. More...
     
    Transformer transformer
     The transformer. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ FetchedFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Fragment cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::FetchedFragment
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef TransformedFragment cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Fragment
    +
    +

    The output fragment.

    + +
    +
    + +

    ◆ Iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator_ cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Iterator
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Storage cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ TransformedFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer::OutputFragment cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::TransformedFragment
    +
    + +
    +
    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer_ cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Transformer
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ SharedLoadStream() [1/2]

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::SharedLoadStream ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ SharedLoadStream() [2/2]

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::SharedLoadStream (Params const & params,
    SharedStorageshared_storage 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::commit (FetchedFragmentfetched,
    TransformedFragmenttransformed 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy() [1/2]

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::copy (FetchedFragmentfetched)
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy() [2/2]

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::copy (int d,
    FetchedFragmentfetched 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_stage()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::inc_stage ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::initialize (Params const & params,
    SharedStorageshared_storage 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::iterator
    +
    + +
    +
    + +

    ◆ transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Transformer cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params-members.html new file mode 100644 index 00000000..f947377b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html new file mode 100644 index 00000000..84b8e79d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html @@ -0,0 +1,157 @@ + + + + + + + +Cutlass: cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_shared_stream.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize ()
     Setup the params. More...
     
    + + + + +

    +Public Attributes

    Iterator::Params iterator
     The iterator params. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params::initialize ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator::Params cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params::iterator
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream-members.html new file mode 100644 index 00000000..6d9707a2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream.html new file mode 100644 index 00000000..ed5b6c40 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream.html @@ -0,0 +1,405 @@ + + + + + + + +Cutlass: cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_stream.h>

    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Iterator_ Iterator
     The store iterator. More...
     
    typedef Transformer_ Transformer
     
    typedef Transformer::InputFragment InputFragment
     The input fragment. More...
     
    typedef Transformer::OutputFragment TransformedFragment
     The fragment that is obtained after the transformation by the transformer. More...
     
    typedef InputFragment Fragment
     Make sure the fragments match. More...
     
    typedef Iterator::Storage SharedStorage
     The storage in shared memory needed by that stream. More...
     
    + + + + + + + + + + + +

    +Public Member Functions

    template<typename Fragment_ >
    CUTLASS_DEVICE SharedStoreStream (Params const &params, SharedStorage &shared_storage, Fragment_ const &fragment, int offset=0)
     Ctor. More...
     
    CUTLASS_DEVICE void copy ()
     Trigger the copy from the fragment to shared memory. More...
     
    CUTLASS_DEVICE void commit ()
     Commit the data. More...
     
    + + + + + + + + + + +

    +Public Attributes

    Iterator iterator
     The iterator. More...
     
    Transformer transformer
     The transformer. More...
     
    TransformedFragment transformed_fragment
     The fragment containing the transformed data before the copy into shared memory. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef InputFragment cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Fragment
    +
    +

    The input fragment.

    + +
    +
    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer::InputFragment cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::InputFragment
    +
    + +
    +
    + +

    ◆ Iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator_ cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Iterator
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Storage cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ TransformedFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer::OutputFragment cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::TransformedFragment
    +
    + +
    +
    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer_ cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Transformer
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ SharedStoreStream()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::SharedStoreStream (Params const & params,
    SharedStorageshared_storage,
    Fragment_ const & fragment,
    int offset = 0 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::copy ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::iterator
    +
    + +
    +
    + +

    ◆ transformed_fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    TransformedFragment cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::transformed_fragment
    +
    + +
    +
    + +

    ◆ transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Transformer cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params-members.html new file mode 100644 index 00000000..6d6c5b23 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params.html new file mode 100644 index 00000000..78b06266 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params.html @@ -0,0 +1,157 @@ + + + + + + + +Cutlass: cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_shared_stream.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize ()
     Setup the params. More...
     
    + + + + +

    +Public Attributes

    Iterator::Params iterator
     The iterator params. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Params::initialize ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator::Params cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Params::iterator
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits-members.html new file mode 100644 index 00000000..43845f4d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Delta typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Functor typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Index typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Iterations typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    OutputTile typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Scalar typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    ScalarC typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    ScalarD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html new file mode 100644 index 00000000..f3896306 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html @@ -0,0 +1,155 @@ + + + + + + + +Cutlass: cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_epilogue_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ >:
    +
    +
    + + +cutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    typedef GemmConfig_::OutputTile OutputTile
     The output tile. More...
     
    typedef GemmConfig_::Accumulators Accumulators
     
    typedef Helper_::GlobalLoadIteratorC GlobalLoadIteratorC
     The iterator for C in global memory. More...
     
    typedef Helper_::GlobalTransformerC GlobalTransformerC
     The transformer for C. More...
     
    typedef Helper_::GlobalTransformerD GlobalTransformerD
     The transformer for D. More...
     
    typedef Helper_::GlobalStoreIteratorD GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef Helper_::SharedStoreIteratorD SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Helper_::SharedStoreTransformerD SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef Helper_::SharedLoadIteratorD SharedLoadIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Helper_::Iterations Iterations
     typedef typename GemmConfig::EpilogueIterations Iterations; More...
     
    typedef Helper_::Delta Delta
     The iterations strides. More...
     
    typedef EpilogueFunctor_ Functor
     The functor in charge of the math. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef Functor::Scalar Scalar
     We do not support 3D or 4D shapes. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.png new file mode 100644 index 00000000..eaded28c Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits-members.html new file mode 100644 index 00000000..eb82c015 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits-members.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Epilogue typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GemmConfig typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Index typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    kLayoutAcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    kLayoutBcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    OutputTile typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarA typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarB typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarC typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarD typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html new file mode 100644 index 00000000..9a2328d5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html @@ -0,0 +1,172 @@ + + + + + + + +Cutlass: cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ >:
    +
    +
    + + +cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    typedef GemmConfig_ GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef Helper_::GlobalLoadStreamA GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamA ::Scalar ScalarA
     The scalar for A. More...
     
    typedef Helper_::GlobalLoadStreamB GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamB ::Scalar ScalarB
     The scalar for B. More...
     
    typedef Helper_::SharedLoadStreamA SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef Helper_::SharedLoadStreamB SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef Epilogue_ Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef IdentityBlockSwizzle BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef ClearAccumulators< GemmConfig_::Accumulators::Element > ClearAccumulators
     Clear the accumulators. More...
     
    - Static Public Member Functions inherited from cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static MatrixLayout::Kind const kLayoutA
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB
     The layout of B. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.png new file mode 100644 index 00000000..3686ced6 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper-members.html new file mode 100644 index 00000000..65e20abd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + +
    GlobalLoadIteratorA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    GlobalLoadIteratorB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    GlobalLoadStreamA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    GlobalLoadStreamB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    GlobalTransformerA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    GlobalTransformerB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedLoadIteratorA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedLoadIteratorB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedLoadStreamA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedLoadStreamB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedStoreIteratorA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedStoreIteratorB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html new file mode 100644 index 00000000..6b3b0494 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html @@ -0,0 +1,328 @@ + + + + + + + +Cutlass: cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperA_::GlobalTileTraits, Index_ > GlobalLoadIteratorA
     The global iterator to load A from global memory. More...
     
    typedef Copy< typename GlobalLoadIteratorA::FragmentGlobalTransformerA
     The data converter for A before storing to shared memory. More...
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperA_::SharedStoreTileTraits, typename GemmTileTraitsHelperA_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorA
     The iterator to store A to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerAGlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperB_::GlobalTileTraits, Index_ > GlobalLoadIteratorB
     The global iterator to load B from global memory. More...
     
    typedef Copy< typename GlobalLoadIteratorB::FragmentGlobalTransformerB
     The data converter for B before storing to shared memory. More...
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperB_::SharedStoreTileTraits, typename GemmTileTraitsHelperB_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorB
     The iterator to store B to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerBGlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperA_::SharedLoadTileTraits, typename GemmTileTraitsHelperA_::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorA
     The iterator to load A from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorASharedLoadStreamA
     The stream to load A from shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperB_::SharedLoadTileTraits, typename GemmTileTraitsHelperB_::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorB
     The iterator to load B from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorBSharedLoadStreamB
     The stream to load B from shared memory. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ GlobalLoadIteratorA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperA_::GlobalTileTraits, Index_> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalLoadIteratorA
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperB_::GlobalTileTraits, Index_> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalLoadIteratorB
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalLoadStreamA
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalLoadStreamB
    +
    + +
    +
    + +

    ◆ GlobalTransformerA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef Copy<typename GlobalLoadIteratorA::Fragment> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalTransformerA
    +
    + +
    +
    + +

    ◆ GlobalTransformerB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef Copy<typename GlobalLoadIteratorB::Fragment> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalTransformerB
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperA_::SharedLoadTileTraits, typename GemmTileTraitsHelperA_::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedLoadIteratorA
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperB_::SharedLoadTileTraits, typename GemmTileTraitsHelperB_::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedLoadIteratorB
    +
    + +
    +
    + +

    ◆ SharedLoadStreamA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorA> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedLoadStreamA
    +
    + +
    +
    + +

    ◆ SharedLoadStreamB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorB> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedLoadStreamB
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperA_::SharedStoreTileTraits, typename GemmTileTraitsHelperA_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedStoreIteratorA
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperB_::SharedStoreTileTraits, typename GemmTileTraitsHelperB_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedStoreIteratorB
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd-members.html new file mode 100644 index 00000000..fd4bda02 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >, including all inherited members.

    + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    AccumulatorsPerThread typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    AccumulatorsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    FragmentA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    FragmentB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    InstructionShape typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >inline
    ScalarA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    ScalarB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    ScalarC typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    ThreadMultiplyAdd()cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >inline
    ThreadsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html new file mode 100644 index 00000000..2fcd68bd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html @@ -0,0 +1,382 @@ + + + + + + + +Cutlass: cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ > Struct Template Reference
    +
    +
    + +

    Template performing matrix multiply-add operation within a thread. +

    + +

    #include <thread_multiply_add.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Shape< 1, 1, 1, 1 > InstructionShape
     The shape of the instruction. More...
     
    typedef AccumulatorsPerThread_ AccumulatorsPerThread
     The number of accumulators per thread. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The number of threads per warp. More...
     
    typedef ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef ScalarA_ ScalarA
     The type for A. More...
     
    typedef Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
     The fragment for A. More...
     
    typedef ScalarB_ ScalarB
     The type for B. More...
     
    typedef Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
     The fragment for B. More...
     
    typedef ScalarC_ ScalarC
     The type for C and D. More...
     
    typedef Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW, 16 > Accumulators
     The accumulators. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE ThreadMultiplyAdd ()
     Ctor. More...
     
    CUTLASS_DEVICE void multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
     Multiply : d = a*b + c. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef Fragment<ScalarC, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW, 16> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::Accumulators
    +
    + +
    +
    + +

    ◆ AccumulatorsPerThread

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef AccumulatorsPerThread_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::AccumulatorsPerThread
    +
    + +
    +
    + +

    ◆ AccumulatorsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef ShapeMul<AccumulatorsPerThread, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::AccumulatorsPerWarp
    +
    + +
    +
    + +

    ◆ FragmentA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef Fragment<ScalarA, AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::FragmentA
    +
    + +
    +
    + +

    ◆ FragmentB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef Fragment<ScalarB, AccumulatorsPerThread::kH> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::FragmentB
    +
    + +
    +
    + +

    ◆ InstructionShape

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef Shape<1, 1, 1, 1> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::InstructionShape
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef ScalarA_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef ScalarB_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef ScalarC_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ThreadsPerWarp
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ ThreadMultiplyAdd()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ThreadMultiplyAdd ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::multiply_add (FragmentA const & a,
    FragmentB const & b,
    Accumulators const & c,
    Accumulatorsd 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_0179827d5e1abec446b31df6ae50a9c4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_0179827d5e1abec446b31df6ae50a9c4.html new file mode 100644 index 00000000..ddea01f4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_0179827d5e1abec446b31df6ae50a9c4.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >, including all inherited members.

    + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    AccumulatorsPerThread typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    AccumulatorsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    FragmentA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    FragmentB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    InstructionShape typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >inline
    ScalarA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    ScalarB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    ScalarC typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    ThreadMultiplyAdd()cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >inline
    ThreadsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html new file mode 100644 index 00000000..2d62b3e2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html @@ -0,0 +1,383 @@ + + + + + + + +Cutlass: cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half > Struct Template Reference
    +
    +
    + +

    Template performing matrix multiply-add operation within a thread. +

    + +

    #include <hgemm_multiply_add.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Shape< 1, 1, 2, 1 > InstructionShape
     The shape of the instruction. More...
     
    typedef AccumulatorsPerThread_ AccumulatorsPerThread
     The number of accumulators per thread. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The number of threads per warp. More...
     
    typedef ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef half ScalarA
     The type for A. More...
     
    typedef Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
     The fragment for A. More...
     
    typedef half ScalarB
     The type for B. More...
     
    typedef Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
     The fragment for B. More...
     
    typedef half ScalarC
     The type for C and D. More...
     
    typedef Fragment< half, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
     The accumulators. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE ThreadMultiplyAdd ()
     Make sure there's an even number of elements in both dimensions. More...
     
    CUTLASS_DEVICE void multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
     Multiply : d = a*b + c. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<half, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::Accumulators
    +
    + +
    +
    + +

    ◆ AccumulatorsPerThread

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef AccumulatorsPerThread_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerThread
    +
    + +
    +
    + +

    ◆ AccumulatorsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef ShapeMul<AccumulatorsPerThread, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerWarp
    +
    + +
    +
    + +

    ◆ FragmentA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<ScalarA, AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentA
    +
    + +
    +
    + +

    ◆ FragmentB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<ScalarB, AccumulatorsPerThread::kH> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentB
    +
    + +
    +
    + +

    ◆ InstructionShape

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Shape<1, 1, 2, 1> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::InstructionShape
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef half cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef half cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef half cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarC
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadsPerWarp
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ ThreadMultiplyAdd()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadMultiplyAdd ()
    +
    +inline
    +
    +

    Ctor.

    + +
    +
    +

    Member Function Documentation

    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::multiply_add (FragmentA const & a,
    FragmentB const & b,
    Accumulators const & c,
    Accumulatorsd 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_ea75a025471611dd709d5f2a07d1bc06.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_ea75a025471611dd709d5f2a07d1bc06.html new file mode 100644 index 00000000..16dd1079 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_ea75a025471611dd709d5f2a07d1bc06.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >, including all inherited members.

    + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    AccumulatorsPerThread typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    AccumulatorsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    FragmentA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    FragmentB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    InstructionShape typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >inline
    ScalarA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    ScalarB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    ScalarC typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    ThreadMultiplyAdd()cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >inline
    ThreadsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html new file mode 100644 index 00000000..d358dd3b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html @@ -0,0 +1,382 @@ + + + + + + + +Cutlass: cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int > Struct Template Reference
    +
    +
    + +

    Template performing matrix multiply-add operation within a thread. +

    + +

    #include <igemm_multiply_add.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Shape< 4, 1, 1 > InstructionShape
     The shape of the instruction. More...
     
    typedef AccumulatorsPerThread_ AccumulatorsPerThread
     The number of accumulators per thread. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The number of threads per warp. More...
     
    typedef ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef int8_t ScalarA
     The type for A. More...
     
    typedef Fragment< ScalarA, AccumulatorsPerThread::kW *4 > FragmentA
     The fragment for A. More...
     
    typedef int8_t ScalarB
     The type for B. More...
     
    typedef Fragment< ScalarB, AccumulatorsPerThread::kH *4 > FragmentB
     The fragment for B. More...
     
    typedef int ScalarC
     The type for C and D. More...
     
    typedef Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
     The accumulators. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE ThreadMultiplyAdd ()
     Ctor. More...
     
    CUTLASS_DEVICE void multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
     Multiply : d = a*b + c. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<ScalarC, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::Accumulators
    +
    + +
    +
    + +

    ◆ AccumulatorsPerThread

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef AccumulatorsPerThread_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerThread
    +
    + +
    +
    + +

    ◆ AccumulatorsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef ShapeMul<AccumulatorsPerThread, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerWarp
    +
    + +
    +
    + +

    ◆ FragmentA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<ScalarA, AccumulatorsPerThread::kW * 4> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentA
    +
    + +
    +
    + +

    ◆ FragmentB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<ScalarB, AccumulatorsPerThread::kH * 4> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentB
    +
    + +
    +
    + +

    ◆ InstructionShape

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Shape<4, 1, 1> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::InstructionShape
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef int8_t cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef int8_t cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef int cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarC
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadsPerWarp
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ ThreadMultiplyAdd()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadMultiplyAdd ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::multiply_add (FragmentA const & a,
    FragmentB const & b,
    Accumulators const & c,
    Accumulatorsd 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd-members.html new file mode 100644 index 00000000..633e1cdc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd-members.html @@ -0,0 +1,131 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Base typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    data()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    data() constcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    Delta typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Fragment typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentConstIterator typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentElement typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentIterator typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentShape typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    ImmediateOffsetStrides typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    inc_advance()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_c()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_d()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_h()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_w()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    Index typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >inlinestatic
    Iterations typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    kAccessSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kAdvancecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kFragmentSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kIteratorFragmentcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kLayoutcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >static
    kMemorySpacecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    paramscutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    Pointer typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    predicatescutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    PredicateVector typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Scalar typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    Skew typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Storage typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    This_ typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    thread_offsetcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    ThreadOffset typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    Threads typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    Tile typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Traits typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    valid(int d, int h, int w, int c) constcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    WmmaGemmGlobalIteratorCd()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    WmmaGemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int const pointer_offset=0, int const pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html new file mode 100644 index 00000000..e9c16077 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html @@ -0,0 +1,820 @@ + + + + + + + +Cutlass: cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <wmma_gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >:
    +
    +
    + + +cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > + +
    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > This_
     This class. More...
     
    typedef TileTraits_ Traits
     The traits. More...
     
    typedef TileIteratorBase< Traits, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > Base
     The base class. More...
     
    typedef Shape< 0, 0, Base::Delta::kW, Base::Delta::kC > ImmediateOffsetStrides
     Override the strides in each dimension between different loads/stores. More...
     
    typedef TileTraits_::Scalar Scalar
     The scalar. More...
     
    typedef TileTraits_::Pointer Pointer
     The pointer. More...
     
    typedef TileTraits_::Threads Threads
     The threads. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef TileTraits_::ThreadOffset ThreadOffset
     The thread offset functor. More...
     
    - Public Types inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    typedef TileTraits_ Traits
     concept TileTraits More...
     
    typedef TileTraits_::Scalar Scalar
     Scalar element. More...
     
    typedef TileTraits_::Scalar FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Shape< 0, 0, 0, 0 > Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE WmmaGemmGlobalIteratorCd ()
     Ctor. More...
     
    CUTLASS_DEVICE WmmaGemmGlobalIteratorCd (Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int const pointer_offset=0, int const pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())
     Ctor. More...
     
    CUTLASS_DEVICE void inc_c ()
     Increment the pointer in the C dimension. More...
     
    CUTLASS_DEVICE void inc_w ()
     Increment the pointer in the W dimension. More...
     
    CUTLASS_DEVICE void inc_h ()
     Increment the pointer in the H dimension. More...
     
    CUTLASS_DEVICE void inc_d ()
     Increment the pointer in the D dimension. More...
     
    CUTLASS_DEVICE void inc_advance ()
     Increment the pointer to move to the next iteration. More...
     
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Test the predicate. More...
     
    CUTLASS_HOST_DEVICE Pointer data ()
     Returns the raw pointer. More...
     
    CUTLASS_HOST_DEVICE Pointer const data () const
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + + + + +

    +Public Attributes

    Params params
     
    Coord< 4 > thread_offset
     
    cutlass::PredicateVector< Base::Iterations::kW > predicates
     The predicates for the row. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = TileTraits_::kLayout
     The layout. More...
     
    - Static Public Attributes inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    static IteratorAdvance::Kind const kAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    + + + + + +

    +Additional Inherited Members

    - Static Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileIteratorBase<Traits, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_> cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Base
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Index_ cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Index
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Pointer cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Scalar cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Scalar
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef WmmaGemmGlobalIteratorCd<TileTraits_, Index_> cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::This_
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::ThreadOffset cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Threads cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Threads
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_ cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ WmmaGemmGlobalIteratorCd() [1/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::WmmaGemmGlobalIteratorCd ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ WmmaGemmGlobalIteratorCd() [2/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::WmmaGemmGlobalIteratorCd (Params const & params,
    const Coord< 3 > & bounds,
    const Coord< 3 > & block,
    int const pointer_offset = 0,
    int const pred_offset = 0,
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data() [1/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Pointer cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::data ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ data() [2/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Pointer const cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_advance()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::inc_advance ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_c()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::inc_c ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_d()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::inc_d ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_h()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::inc_h ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_w()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::inc_w ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::kLayout = TileTraits_::kLayout
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Params cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::params
    +
    + +
    +
    + +

    ◆ predicates

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    cutlass::PredicateVector<Base::Iterations::kW> cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::predicates
    +
    + +
    +
    + +

    ◆ thread_offset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Coord<4> cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::thread_offset
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.png b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.png new file mode 100644 index 00000000..f5ad5c7a Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits-members.html new file mode 100644 index 00000000..11f86143 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >
    Delta typedefcutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Iterations typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    ThreadsDelta typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html new file mode 100644 index 00000000..d327b3c1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html @@ -0,0 +1,197 @@ + + + + + + + +Cutlass: cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <wmma_gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > Base
     The base class. More...
     
    typedef Shape< 0, 0, Base::Delta::kW, Base::Delta::kCDelta
     Override the strides in each dimension between different loads/stores. More...
     
    - Public Types inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile shape. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads shape. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsDelta
     The relative offset between two elements in the H/W dimension in adjacent threads. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSizeImmediateOffsetStrides
     Strides for immediate offset computation. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Static Public Attributes inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    static GemmOperand::Kind const kOperand
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout
     The layout. More...
     
    static int const kAccessSize
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef GemmGlobalTileTraits<GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_> cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::Delta
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.png new file mode 100644 index 00000000..4c15d9dd Binary files /dev/null and b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.png differ diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html new file mode 100644 index 00000000..383587f8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html new file mode 100644 index 00000000..0b836280 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <wmma_gemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params-members.html new file mode 100644 index 00000000..9f72d003 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html new file mode 100644 index 00000000..32bf2ce8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html @@ -0,0 +1,298 @@ + + + + + + + +Cutlass: cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <wmma_gemm_global_tile.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Pointer pointer, Index ld, Index n, Index epilogue_stride_w, Index epilogue_delta_w)
     Setup the params. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Pointer pointer
     The pointer. More...
     
    Index stride_h
     The stride in the H dimension to setup the thread in the block. More...
     
    Index inc_h
     The strides to increment the pointer. More...
     
    Index inc_advance
     
    Index predicate_offset
     The column offset to compute the predicate for the columns. More...
     
    Index predicate_inc_h
     The strides to increment the predicate offset. More...
     
    Index predicate_inc_advance
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::initialize (Pointer pointer,
    Index ld,
    Index n,
    Index epilogue_stride_w,
    Index epilogue_delta_w 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ inc_advance

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::inc_advance
    +
    + +
    +
    + +

    ◆ inc_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::inc_h
    +
    + +
    +
    + +

    ◆ pointer

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Pointer cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::pointer
    +
    + +
    +
    + +

    ◆ predicate_inc_advance

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_inc_advance
    +
    + +
    +
    + +

    ◆ predicate_inc_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_inc_h
    +
    + +
    +
    + +

    ◆ predicate_offset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_offset
    +
    + +
    +
    + +

    ◆ stride_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::stride_h
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1SharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1SharedStorage.html new file mode 100644 index 00000000..78f13205 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1SharedStorage.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::SharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::SharedStorage Struct Reference
    +
    +
    + +

    The shared memory storage needed by the iterator. +

    + +

    #include <wmma_gemm_global_tile.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1is__pow2-members.html b/docs/generated-html/structcutlass_1_1is__pow2-members.html new file mode 100644 index 00000000..8ec01315 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1is__pow2-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::is_pow2< N > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1is__pow2.html b/docs/generated-html/structcutlass_1_1is__pow2.html new file mode 100644 index 00000000..01c0ea16 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1is__pow2.html @@ -0,0 +1,125 @@ + + + + + + + +Cutlass: cutlass::is_pow2< N > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::is_pow2< N > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    +
    +Inheritance diagram for cutlass::is_pow2< N >:
    +
    +
    + + +cutlass::platform::integral_constant< bool,(N &(N - 1))==0 > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool,(N &(N - 1))==0 >
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool,(N &(N - 1))==0 >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool,(N &(N - 1))==0 >
    static const bool value
     
    +

    Detailed Description

    +

    template<int N>
    +struct cutlass::is_pow2< N >

    + +

    Statically determine if N is a power-of-two

    +

    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1is__pow2.png b/docs/generated-html/structcutlass_1_1is__pow2.png new file mode 100644 index 00000000..00d3a504 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1is__pow2.png differ diff --git a/docs/generated-html/structcutlass_1_1log2__down-members.html b/docs/generated-html/structcutlass_1_1log2__down-members.html new file mode 100644 index 00000000..e585259d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__down-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::log2_down< N, CurrentVal, Count > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::log2_down< N, CurrentVal, Count >, including all inherited members.

    + + +
    value enum valuecutlass::log2_down< N, CurrentVal, Count >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__down.html b/docs/generated-html/structcutlass_1_1log2__down.html new file mode 100644 index 00000000..bee9f1c8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__down.html @@ -0,0 +1,128 @@ + + + + + + + +Cutlass: cutlass::log2_down< N, CurrentVal, Count > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::log2_down< N, CurrentVal, Count > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + + +

    +Public Types

    enum  { value = log2_down<N, (CurrentVal >> 1), Count + 1>::value + }
     Static logarithm value. More...
     
    +

    Detailed Description

    +

    template<int N, int CurrentVal = N, int Count = 0>
    +struct cutlass::log2_down< N, CurrentVal, Count >

    + +

    Statically determine log2(N), rounded down

    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int N, int CurrentVal = N, int Count = 0>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4-members.html b/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4-members.html new file mode 100644 index 00000000..9e97ad59 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::log2_down< N, 1, Count > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::log2_down< N, 1, Count >, including all inherited members.

    + + +
    value enum valuecutlass::log2_down< N, 1, Count >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html b/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html new file mode 100644 index 00000000..115f5dec --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html @@ -0,0 +1,122 @@ + + + + + + + +Cutlass: cutlass::log2_down< N, 1, Count > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::log2_down< N, 1, Count > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + +

    +Public Types

    enum  { value = Count + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int N, int Count>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__up-members.html b/docs/generated-html/structcutlass_1_1log2__up-members.html new file mode 100644 index 00000000..b402e19a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__up-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::log2_up< N, CurrentVal, Count > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::log2_up< N, CurrentVal, Count >, including all inherited members.

    + + +
    value enum valuecutlass::log2_up< N, CurrentVal, Count >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__up.html b/docs/generated-html/structcutlass_1_1log2__up.html new file mode 100644 index 00000000..76434f76 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__up.html @@ -0,0 +1,128 @@ + + + + + + + +Cutlass: cutlass::log2_up< N, CurrentVal, Count > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::log2_up< N, CurrentVal, Count > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + + +

    +Public Types

    enum  { value = log2_up<N, (CurrentVal >> 1), Count + 1>::value + }
     Static logarithm value. More...
     
    +

    Detailed Description

    +

    template<int N, int CurrentVal = N, int Count = 0>
    +struct cutlass::log2_up< N, CurrentVal, Count >

    + +

    Statically determine log2(N), rounded up

    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int N, int CurrentVal = N, int Count = 0>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4-members.html b/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4-members.html new file mode 100644 index 00000000..43e9b591 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::log2_up< N, 1, Count > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::log2_up< N, 1, Count >, including all inherited members.

    + + +
    value enum valuecutlass::log2_up< N, 1, Count >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html b/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html new file mode 100644 index 00000000..7fffdf1b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html @@ -0,0 +1,122 @@ + + + + + + + +Cutlass: cutlass::log2_up< N, 1, Count > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::log2_up< N, 1, Count > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + +

    +Public Types

    enum  { value = ((1 << Count) < N) ? Count + 1 : Count + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int N, int Count>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1aligned__chunk.html b/docs/generated-html/structcutlass_1_1platform_1_1aligned__chunk.html new file mode 100644 index 00000000..3de8d20b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1aligned__chunk.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::platform::aligned_chunk< Align > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::aligned_chunk< Align > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage-members.html b/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage-members.html new file mode 100644 index 00000000..fd6fe12e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::aligned_storage< Len, Align > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::aligned_storage< Len, Align >, including all inherited members.

    + + +
    type typedefcutlass::platform::aligned_storage< Len, Align >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage.html b/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage.html new file mode 100644 index 00000000..aff24062 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::aligned_storage< Len, Align > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::aligned_storage< Len, Align > Struct Template Reference
    +
    +
    + +

    std::aligned_storage +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef aligned_chunk< Align > type[Len/sizeof(aligned_chunk< Align >)]
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<size_t Len, size_t Align>
    + + + + +
    typedef aligned_chunk<Align> cutlass::platform::aligned_storage< Len, Align >::type[Len/sizeof(aligned_chunk< Align >)]
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of-members.html new file mode 100644 index 00000000..fc7b447c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< value_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< value_t >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.html new file mode 100644 index 00000000..694be0bd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.html @@ -0,0 +1,142 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< value_t > Struct Template Reference
    +
    +
    + +

    std::alignment_of +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::alignment_of< value_t >:
    +
    +
    + + +cutlass::platform::alignment_of< const value_t > +cutlass::platform::alignment_of< const volatile value_t > +cutlass::platform::alignment_of< volatile value_t > + +
    + + + + +

    +Classes

    struct  pad
     
    + + + +

    +Public Types

    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.png b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.png new file mode 100644 index 00000000..ed715083 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad-members.html new file mode 100644 index 00000000..acdc5370 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< value_t >::pad Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad.html new file mode 100644 index 00000000..55d1bd6e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad.html @@ -0,0 +1,136 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< value_t >::pad Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< value_t >::pad Struct Reference
    +
    +
    + +

    #include <platform.h>

    + + + + + + +

    +Public Attributes

    value_t val
     
    char byte
     
    +

    Member Data Documentation

    + +

    ◆ byte

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    char cutlass::platform::alignment_of< value_t >::pad::byte
    +
    + +
    +
    + +

    ◆ val

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    value_t cutlass::platform::alignment_of< value_t >::pad::val
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4-members.html new file mode 100644 index 00000000..ea64f250 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< const value_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< const value_t >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html new file mode 100644 index 00000000..8e98bc53 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< const value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< const value_t > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::alignment_of< const value_t >:
    +
    +
    + + +cutlass::platform::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.png new file mode 100644 index 00000000..2be14aba Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html new file mode 100644 index 00000000..a61b3244 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< const volatile value_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< const volatile value_t >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html new file mode 100644 index 00000000..4f8edc6e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< const volatile value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< const volatile value_t > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::alignment_of< const volatile value_t >:
    +
    +
    + + +cutlass::platform::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png new file mode 100644 index 00000000..94c91b65 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4-members.html new file mode 100644 index 00000000..0e3715f6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< double2 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< double2 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< double2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html new file mode 100644 index 00000000..a60e78c5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< double2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< double2 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4-members.html new file mode 100644 index 00000000..8f2714a9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< double4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< double4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< double4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html new file mode 100644 index 00000000..2084602e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< double4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< double4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4-members.html new file mode 100644 index 00000000..3cbf902f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< float4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< float4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< float4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html new file mode 100644 index 00000000..845ab556 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< float4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< float4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4-members.html new file mode 100644 index 00000000..8aa75828 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< int4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< int4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< int4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html new file mode 100644 index 00000000..1d78331a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< int4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< int4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4-members.html new file mode 100644 index 00000000..b788913c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< long4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< long4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< long4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html new file mode 100644 index 00000000..0a6a59b6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< long4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< long4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4-members.html new file mode 100644 index 00000000..ce64ecf4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< longlong2 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< longlong2 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< longlong2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html new file mode 100644 index 00000000..b5d0d214 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< longlong2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< longlong2 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4-members.html new file mode 100644 index 00000000..7bdc4055 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< longlong4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< longlong4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< longlong4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html new file mode 100644 index 00000000..e03232f6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< longlong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< longlong4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4-members.html new file mode 100644 index 00000000..50144350 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< uint4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< uint4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< uint4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html new file mode 100644 index 00000000..45a392e2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< uint4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< uint4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4-members.html new file mode 100644 index 00000000..b18799de --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< ulong4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< ulong4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< ulong4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html new file mode 100644 index 00000000..45ca5ac5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< ulong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< ulong4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4-members.html new file mode 100644 index 00000000..867a1a97 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< ulonglong2 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< ulonglong2 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< ulonglong2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html new file mode 100644 index 00000000..e74e4909 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< ulonglong2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< ulonglong2 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4-members.html new file mode 100644 index 00000000..57209782 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< ulonglong4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< ulonglong4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< ulonglong4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html new file mode 100644 index 00000000..de2f97f0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< ulonglong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< ulonglong4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4-members.html new file mode 100644 index 00000000..93d5c387 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< volatile value_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< volatile value_t >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html new file mode 100644 index 00000000..9ad8844d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< volatile value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< volatile value_t > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::alignment_of< volatile value_t >:
    +
    +
    + + +cutlass::platform::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.png new file mode 100644 index 00000000..331d786e Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1bool__constant-members.html b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant-members.html new file mode 100644 index 00000000..970d8749 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::bool_constant< V > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.html b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.html new file mode 100644 index 00000000..77a7942d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::bool_constant< V > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::bool_constant< V > Struct Template Reference
    +
    +
    + +

    std::bool_constant +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::bool_constant< V >:
    +
    +
    + + +cutlass::platform::integral_constant< bool, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool, V >
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool, V >
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.png b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.png new file mode 100644 index 00000000..0740f5a6 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1conditional-members.html b/docs/generated-html/structcutlass_1_1platform_1_1conditional-members.html new file mode 100644 index 00000000..0e246b14 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1conditional-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::conditional< B, T, F > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::conditional< B, T, F >, including all inherited members.

    + + +
    type typedefcutlass::platform::conditional< B, T, F >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1conditional.html b/docs/generated-html/structcutlass_1_1platform_1_1conditional.html new file mode 100644 index 00000000..71ae69b8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1conditional.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::conditional< B, T, F > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::conditional< B, T, F > Struct Template Reference
    +
    +
    + +

    std::conditional (true specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<bool B, class T, class F >
    + + + + +
    typedef T cutlass::platform::conditional< B, T, F >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html new file mode 100644 index 00000000..309d3523 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::conditional< false, T, F > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::conditional< false, T, F >, including all inherited members.

    + + +
    type typedefcutlass::platform::conditional< false, T, F >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html new file mode 100644 index 00000000..4eee2e3e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::conditional< false, T, F > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::conditional< false, T, F > Struct Template Reference
    +
    +
    + +

    std::conditional (false specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef F type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<class T , class F >
    + + + + +
    typedef F cutlass::platform::conditional< false, T, F >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1default__delete-members.html b/docs/generated-html/structcutlass_1_1platform_1_1default__delete-members.html new file mode 100644 index 00000000..0d47203d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1default__delete-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::default_delete< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::default_delete< T >, including all inherited members.

    + + +
    operator()(T *ptr) constcutlass::platform::default_delete< T >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1default__delete.html b/docs/generated-html/structcutlass_1_1platform_1_1default__delete.html new file mode 100644 index 00000000..d15c650d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1default__delete.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::platform::default_delete< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::default_delete< T > Struct Template Reference
    +
    +
    + +

    Default deleter. +

    + +

    #include <platform.h>

    + + + + +

    +Public Member Functions

    void operator() (T *ptr) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + +
    void cutlass::platform::default_delete< T >::operator() (T * ptr) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4-members.html new file mode 100644 index 00000000..2ad2a948 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::default_delete< T[]> Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::default_delete< T[]>, including all inherited members.

    + + +
    operator()(T *ptr) constcutlass::platform::default_delete< T[]>inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html b/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html new file mode 100644 index 00000000..9051d14f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::platform::default_delete< T[]> Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::default_delete< T[]> Struct Template Reference
    +
    +
    + +

    Partial specialization for deleting array types. +

    + +

    #include <platform.h>

    + + + + +

    +Public Member Functions

    void operator() (T *ptr) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + +
    void cutlass::platform::default_delete< T[]>::operator() (T * ptr) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1enable__if-members.html b/docs/generated-html/structcutlass_1_1platform_1_1enable__if-members.html new file mode 100644 index 00000000..cd8ca9d5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1enable__if-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::enable_if< C, T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::enable_if< C, T >, including all inherited members.

    + + +
    type typedefcutlass::platform::enable_if< C, T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1enable__if.html b/docs/generated-html/structcutlass_1_1platform_1_1enable__if.html new file mode 100644 index 00000000..2621d173 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1enable__if.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::enable_if< C, T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::enable_if< C, T > Struct Template Reference
    +
    +
    + +

    std::enable_if (true specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<bool C, typename T = void>
    + + + + +
    typedef T cutlass::platform::enable_if< C, T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html new file mode 100644 index 00000000..22e8b1c9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::platform::enable_if< false, T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::enable_if< false, T > Struct Template Reference
    +
    +
    + +

    std::enable_if (false specialization) +

    + +

    #include <platform.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1greater-members.html b/docs/generated-html/structcutlass_1_1platform_1_1greater-members.html new file mode 100644 index 00000000..4cf39f7c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1greater-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::greater< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::greater< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constcutlass::platform::greater< T >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1greater.html b/docs/generated-html/structcutlass_1_1platform_1_1greater.html new file mode 100644 index 00000000..aadc82c7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1greater.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: cutlass::platform::greater< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::greater< T > Struct Template Reference
    +
    +
    + +

    std::greater +

    + +

    #include <platform.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexpr bool operator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::greater< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1integral__constant-members.html b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant-members.html new file mode 100644 index 00000000..40aca68b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::integral_constant< value_t, V > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.html b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.html new file mode 100644 index 00000000..db936f3f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.html @@ -0,0 +1,261 @@ + + + + + + + +Cutlass: cutlass::platform::integral_constant< value_t, V > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::integral_constant< value_t, V > Struct Template Reference
    +
    +
    + +

    std::integral_constant +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::integral_constant< value_t, V >:
    +
    +
    + + +cutlass::platform::is_integral< T > +cutlass::platform::is_integral< char > +cutlass::platform::is_integral< int > +cutlass::platform::is_integral< long > +cutlass::platform::is_integral< long long > +cutlass::platform::is_integral< short > +cutlass::platform::is_integral< signed char > +cutlass::platform::is_integral< unsigned char > +cutlass::platform::is_integral< unsigned int > +cutlass::platform::is_integral< unsigned long > +cutlass::platform::is_integral< unsigned long long > +cutlass::platform::is_integral< unsigned short > +cutlass::platform::is_pointer_helper< T > +cutlass::platform::is_pointer_helper< T * > +cutlass::platform::is_same< A, B > +cutlass::platform::is_same< A, A > +cutlass::platform::is_volatile< T > +cutlass::platform::is_volatile< volatile T > +cutlass::platform::is_pointer_helper< remove_cv< T >::type > +cutlass::platform::is_same< void, remove_cv< T >::type > + +
    + + + + + + +

    +Public Types

    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    + + + +

    +Static Public Attributes

    static const value_t value = V
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + +
    typedef integral_constant<value_t, V> cutlass::platform::integral_constant< value_t, V >::type
    +
    + +
    +
    + +

    ◆ value_type

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + +
    typedef value_t cutlass::platform::integral_constant< value_t, V >::value_type
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ operator value_type()

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::platform::integral_constant< value_t, V >::operator value_type () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE const value_type cutlass::platform::integral_constant< value_t, V >::operator() () const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ value

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + +
    const value_t cutlass::platform::integral_constant< value_t, V >::value = V
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.png b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.png new file mode 100644 index 00000000..08d53740 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic-members.html new file mode 100644 index 00000000..1b38edec --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_arithmetic< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.html b/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.html new file mode 100644 index 00000000..a97af770 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_arithmetic< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_arithmetic< T > Struct Template Reference
    +
    +
    + +

    std::is_arithmetic +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_arithmetic< T >:
    +
    +
    + + +cutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.png b/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.png new file mode 100644 index 00000000..40005ff3 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of-members.html new file mode 100644 index 00000000..249c1c91 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_base_of< BaseT, DerivedT > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.html new file mode 100644 index 00000000..dedb3f49 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_base_of< BaseT, DerivedT > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_base_of< BaseT, DerivedT > Struct Template Reference
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.png b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.png new file mode 100644 index 00000000..00a50369 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper-members.html new file mode 100644 index 00000000..8b690927 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_base_of_helper< BaseT, DerivedT > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper.html new file mode 100644 index 00000000..023363b0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper.html @@ -0,0 +1,264 @@ + + + + + + + +Cutlass: cutlass::platform::is_base_of_helper< BaseT, DerivedT > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_base_of_helper< BaseT, DerivedT > Struct Template Reference
    +
    +
    + +

    Helper for std::is_base_of. +

    + +

    #include <platform.h>

    + + + + +

    +Classes

    struct  dummy
     
    + + + + + +

    +Public Types

    typedef char(& yes)[1]
     
    typedef char(& no)[2]
     
    + + + + + + +

    +Static Public Member Functions

    template<typename T >
    static CUTLASS_HOST_DEVICE yes check (DerivedT *, T)
     
    static CUTLASS_HOST_DEVICE no check (BaseT *, int)
     
    + + + +

    +Static Public Attributes

    static const bool value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes)
     
    +

    Member Typedef Documentation

    + +

    ◆ no

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + +
    typedef char(& cutlass::platform::is_base_of_helper< BaseT, DerivedT >::no)[2]
    +
    + +
    +
    + +

    ◆ yes

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + +
    typedef char(& cutlass::platform::is_base_of_helper< BaseT, DerivedT >::yes)[1]
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ check() [1/2]

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    static CUTLASS_HOST_DEVICE yes cutlass::platform::is_base_of_helper< BaseT, DerivedT >::check (DerivedT * ,
     
    )
    +
    +static
    +
    + +
    +
    + +

    ◆ check() [2/2]

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    static CUTLASS_HOST_DEVICE no cutlass::platform::is_base_of_helper< BaseT, DerivedT >::check (BaseT * ,
    int  
    )
    +
    +static
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ value

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + + +
    + + + + +
    const bool cutlass::platform::is_base_of_helper< BaseT, DerivedT >::value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy-members.html new file mode 100644 index 00000000..681dfbf0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html new file mode 100644 index 00000000..99556de7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html @@ -0,0 +1,146 @@ + + + + + + + +Cutlass: cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE operator B* () const
     
    CUTLASS_HOST_DEVICE operator D* ()
     
    +

    Member Function Documentation

    + +

    ◆ operator B*()

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename B , typename D >
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >::operator B* () const
    +
    + +
    +
    + +

    ◆ operator D*()

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename B , typename D >
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >::operator D* ()
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point-members.html new file mode 100644 index 00000000..d9fc9091 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_floating_point< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.html b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.html new file mode 100644 index 00000000..a4612f17 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_floating_point< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_floating_point< T > Struct Template Reference
    +
    +
    + +

    std::is_floating_point +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_floating_point< T >:
    +
    +
    + + +cutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.png b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.png new file mode 100644 index 00000000..f1bc33cd Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental-members.html new file mode 100644 index 00000000..9594895f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_fundamental< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.html b/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.html new file mode 100644 index 00000000..0fbbe471 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_fundamental< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_fundamental< T > Struct Template Reference
    +
    +
    + +

    std::is_fundamental +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_fundamental< T >:
    +
    +
    + + +cutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.png b/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.png new file mode 100644 index 00000000..310dffc3 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral-members.html new file mode 100644 index 00000000..1e8b7096 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral.html new file mode 100644 index 00000000..a8218637 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< T > Struct Template Reference
    +
    +
    + +

    std::is_integral +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< T >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > +cutlass::platform::is_integral< const T > +cutlass::platform::is_integral< const volatile T > +cutlass::platform::is_integral< volatile T > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral.png new file mode 100644 index 00000000..0c646de0 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4-members.html new file mode 100644 index 00000000..6de7dfe3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html new file mode 100644 index 00000000..faa0e6d2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< char > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< char >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.png new file mode 100644 index 00000000..62f9f7b9 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4-members.html new file mode 100644 index 00000000..8f7fc5b4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< const T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html new file mode 100644 index 00000000..769cba51 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< const T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< const T > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< const T >:
    +
    +
    + + +cutlass::platform::is_integral< T > +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.png new file mode 100644 index 00000000..91f226cb Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4-members.html new file mode 100644 index 00000000..435a585b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< const volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html new file mode 100644 index 00000000..44de345c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< const volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< const volatile T > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< const volatile T >:
    +
    +
    + + +cutlass::platform::is_integral< T > +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.png new file mode 100644 index 00000000..c15aa4db Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4-members.html new file mode 100644 index 00000000..28ebf9d1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< int > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html new file mode 100644 index 00000000..fe037e17 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< int > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< int >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.png new file mode 100644 index 00000000..2817eea5 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4-members.html new file mode 100644 index 00000000..679ccf3b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html new file mode 100644 index 00000000..2643071c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< long > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< long >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.png new file mode 100644 index 00000000..01b1f205 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4-members.html new file mode 100644 index 00000000..82a054fa --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< long long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html new file mode 100644 index 00000000..a3f5c11d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< long long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< long long > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< long long >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.png new file mode 100644 index 00000000..f3245da6 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4-members.html new file mode 100644 index 00000000..3dc681ff --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< short > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html new file mode 100644 index 00000000..119f69bf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< short > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< short > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< short >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.png new file mode 100644 index 00000000..964e45f8 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4-members.html new file mode 100644 index 00000000..74a51c68 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< signed char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html new file mode 100644 index 00000000..78ff2a0c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< signed char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< signed char > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< signed char >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.png new file mode 100644 index 00000000..d8ed29c3 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4-members.html new file mode 100644 index 00000000..ce568484 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< unsigned char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html new file mode 100644 index 00000000..eb0734cd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< unsigned char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< unsigned char > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< unsigned char >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.png new file mode 100644 index 00000000..fb350f80 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4-members.html new file mode 100644 index 00000000..e334b6af --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< unsigned int > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html new file mode 100644 index 00000000..669a35f4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< unsigned int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< unsigned int > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< unsigned int >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.png new file mode 100644 index 00000000..62eb1b3a Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4-members.html new file mode 100644 index 00000000..f0de6020 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< unsigned long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html new file mode 100644 index 00000000..57166d8f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< unsigned long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< unsigned long > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< unsigned long >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.png new file mode 100644 index 00000000..e758fe23 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html new file mode 100644 index 00000000..4c796bf9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< unsigned long long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html new file mode 100644 index 00000000..8fb6640e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< unsigned long long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< unsigned long long > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< unsigned long long >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.png new file mode 100644 index 00000000..3880ac46 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4-members.html new file mode 100644 index 00000000..c7dbea3f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< unsigned short > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html new file mode 100644 index 00000000..3dad4c36 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< unsigned short > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< unsigned short > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< unsigned short >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.png new file mode 100644 index 00000000..213ed400 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4-members.html new file mode 100644 index 00000000..cf6c6e95 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html new file mode 100644 index 00000000..771358c7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< volatile T > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< volatile T >:
    +
    +
    + + +cutlass::platform::is_integral< T > +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.png new file mode 100644 index 00000000..774b26f3 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer-members.html new file mode 100644 index 00000000..7f6e12d6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_pointer< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.html b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.html new file mode 100644 index 00000000..f6bd0999 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: cutlass::platform::is_pointer< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_pointer< T > Struct Template Reference
    +
    +
    + +

    std::is_pointer +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_pointer< T >:
    +
    +
    + + +cutlass::platform::is_pointer_helper< remove_cv< T >::type > +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.png b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.png new file mode 100644 index 00000000..e83115cf Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper-members.html new file mode 100644 index 00000000..4d36210e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_pointer_helper< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.html b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.html new file mode 100644 index 00000000..56fdd506 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_pointer_helper< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_pointer_helper< T > Struct Template Reference
    +
    +
    + +

    Helper for std::is_pointer (false specialization) +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_pointer_helper< T >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.png b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.png new file mode 100644 index 00000000..bd1fb4bc Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4-members.html new file mode 100644 index 00000000..9a6bacc8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_pointer_helper< T * > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html new file mode 100644 index 00000000..1e1fb5ed --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_pointer_helper< T * > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_pointer_helper< T * > Struct Template Reference
    +
    +
    + +

    Helper for std::is_pointer (true specialization) +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_pointer_helper< T * >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.png new file mode 100644 index 00000000..6e07cf62 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__same-members.html new file mode 100644 index 00000000..3ed687e1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__same-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_same< A, B > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same.html b/docs/generated-html/structcutlass_1_1platform_1_1is__same.html new file mode 100644 index 00000000..bc71a845 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__same.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_same< A, B > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_same< A, B > Struct Template Reference
    +
    +
    + +

    std::is_same (false specialization) +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_same< A, B >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same.png b/docs/generated-html/structcutlass_1_1platform_1_1is__same.png new file mode 100644 index 00000000..66bdead4 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__same.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4-members.html new file mode 100644 index 00000000..0c4aba48 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_same< A, A > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html new file mode 100644 index 00000000..973be030 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_same< A, A > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_same< A, A > Struct Template Reference
    +
    +
    + +

    std::is_same (true specialization) +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_same< A, A >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.png new file mode 100644 index 00000000..cb7e9686 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable-members.html new file mode 100644 index 00000000..1ba94b36 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_trivially_copyable< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.html b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.html new file mode 100644 index 00000000..f779e4e4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: cutlass::platform::is_trivially_copyable< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_trivially_copyable< T > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_trivially_copyable< T >:
    +
    +
    + + +cutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    static const bool value
     
    +

    Detailed Description

    +

    template<typename T>
    +struct cutlass::platform::is_trivially_copyable< T >

    + +

    std::is_trivially_copyable

    +

    This implementation only evaluates true if T is fundamental or pointer

    +

    Without help from partial template specializations provided by the user for a specific class or struct, this trait will never report that the specified class or struct is trivially-copyable ; this is always safe, if possibly sub-optimal.

    +

    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.png b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.png new file mode 100644 index 00000000..5103120e Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__void-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__void-members.html new file mode 100644 index 00000000..a04530c0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__void-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_void< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__void.html b/docs/generated-html/structcutlass_1_1platform_1_1is__void.html new file mode 100644 index 00000000..e71b0358 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__void.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: cutlass::platform::is_void< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_void< T > Struct Template Reference
    +
    +
    + +

    std::is_void +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_void< T >:
    +
    +
    + + +cutlass::platform::is_same< void, remove_cv< T >::type > +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__void.png b/docs/generated-html/structcutlass_1_1platform_1_1is__void.png new file mode 100644 index 00000000..20e46d78 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__void.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__volatile-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile-members.html new file mode 100644 index 00000000..95b52e5f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_volatile< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.html b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.html new file mode 100644 index 00000000..a75658c0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_volatile< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_volatile< T > Struct Template Reference
    +
    +
    + +

    std::is_volatile +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_volatile< T >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.png b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.png new file mode 100644 index 00000000..7a744237 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4-members.html new file mode 100644 index 00000000..bc3af387 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_volatile< volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html new file mode 100644 index 00000000..c2817b5f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_volatile< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_volatile< volatile T > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_volatile< volatile T >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.png new file mode 100644 index 00000000..b86e2a59 Binary files /dev/null and b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.png differ diff --git a/docs/generated-html/structcutlass_1_1platform_1_1less-members.html b/docs/generated-html/structcutlass_1_1platform_1_1less-members.html new file mode 100644 index 00000000..24798c6b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1less-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::less< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::less< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constcutlass::platform::less< T >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1less.html b/docs/generated-html/structcutlass_1_1platform_1_1less.html new file mode 100644 index 00000000..abaff3e4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1less.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: cutlass::platform::less< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::less< T > Struct Template Reference
    +
    +
    + +

    std::less +

    + +

    #include <platform.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexpr bool operator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::less< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1nullptr__t.html b/docs/generated-html/structcutlass_1_1platform_1_1nullptr__t.html new file mode 100644 index 00000000..c35b9e85 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1nullptr__t.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::platform::nullptr_t Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::nullptr_t Struct Reference
    +
    +
    + +

    std::nullptr_t +

    + +

    #include <platform.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1plus-members.html b/docs/generated-html/structcutlass_1_1platform_1_1plus-members.html new file mode 100644 index 00000000..6055a46c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1plus-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::plus< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::plus< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constcutlass::platform::plus< T >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1plus.html b/docs/generated-html/structcutlass_1_1platform_1_1plus.html new file mode 100644 index 00000000..71f732c9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1plus.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: cutlass::platform::plus< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::plus< T > Struct Template Reference
    +
    +
    + +

    platform::plus +

    + +

    #include <platform.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexproperator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr T cutlass::platform::plus< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__const-members.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__const-members.html new file mode 100644 index 00000000..a67005a9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__const-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::remove_const< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::remove_const< T >, including all inherited members.

    + + +
    type typedefcutlass::platform::remove_const< T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__const.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__const.html new file mode 100644 index 00000000..d0af5788 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__const.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::remove_const< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::remove_const< T > Struct Template Reference
    +
    +
    + +

    std::remove_const (non-const specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T>
    + + + + +
    typedef T cutlass::platform::remove_const< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4-members.html new file mode 100644 index 00000000..49041398 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::remove_const< const T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::remove_const< const T >, including all inherited members.

    + + +
    type typedefcutlass::platform::remove_const< const T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html new file mode 100644 index 00000000..a8fff9b4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::remove_const< const T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::remove_const< const T > Struct Template Reference
    +
    +
    + +

    std::remove_const (const specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef T cutlass::platform::remove_const< const T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__cv-members.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__cv-members.html new file mode 100644 index 00000000..64c6607b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__cv-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::remove_cv< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::remove_cv< T >, including all inherited members.

    + + +
    type typedefcutlass::platform::remove_cv< T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__cv.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__cv.html new file mode 100644 index 00000000..5972cb34 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__cv.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::remove_cv< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::remove_cv< T > Struct Template Reference
    +
    +
    + +

    std::remove_cv +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef remove_volatile< typename remove_const< T >::type >::type type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef remove_volatile<typename remove_const<T>::type>::type cutlass::platform::remove_cv< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile-members.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile-members.html new file mode 100644 index 00000000..19a47545 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::remove_volatile< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::remove_volatile< T >, including all inherited members.

    + + +
    type typedefcutlass::platform::remove_volatile< T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile.html new file mode 100644 index 00000000..eb259c2e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::remove_volatile< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::remove_volatile< T > Struct Template Reference
    +
    +
    + +

    std::remove_volatile (non-volatile specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T>
    + + + + +
    typedef T cutlass::platform::remove_volatile< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4-members.html new file mode 100644 index 00000000..09e68535 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::remove_volatile< volatile T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::remove_volatile< volatile T >, including all inherited members.

    + + +
    type typedefcutlass::platform::remove_volatile< volatile T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html new file mode 100644 index 00000000..d2a95b21 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::remove_volatile< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::remove_volatile< volatile T > Struct Template Reference
    +
    +
    + +

    std::remove_volatile (volatile specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef T cutlass::platform::remove_volatile< volatile T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1sqrt__est-members.html b/docs/generated-html/structcutlass_1_1sqrt__est-members.html new file mode 100644 index 00000000..56f16140 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1sqrt__est-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::sqrt_est< N > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::sqrt_est< N >, including all inherited members.

    + + +
    value enum valuecutlass::sqrt_est< N >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1sqrt__est.html b/docs/generated-html/structcutlass_1_1sqrt__est.html new file mode 100644 index 00000000..c973ff72 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1sqrt__est.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: cutlass::sqrt_est< N > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::sqrt_est< N > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + +

    +Public Types

    enum  { value = 1 << (log2_up<N>::value / 2) + }
     
    +

    Detailed Description

    +

    template<int N>
    +struct cutlass::sqrt_est< N >

    + +

    Statically estimate sqrt(N) to the nearest power-of-two

    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int N>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1aligned__chunk.html b/docs/generated-html/structnv__std_1_1aligned__chunk.html new file mode 100644 index 00000000..07341887 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1aligned__chunk.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: nv_std::aligned_chunk< Align > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::aligned_chunk< Align > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1aligned__storage-members.html b/docs/generated-html/structnv__std_1_1aligned__storage-members.html new file mode 100644 index 00000000..21b81924 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1aligned__storage-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::aligned_storage< Len, Align > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::aligned_storage< Len, Align >, including all inherited members.

    + + +
    type typedefnv_std::aligned_storage< Len, Align >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1aligned__storage.html b/docs/generated-html/structnv__std_1_1aligned__storage.html new file mode 100644 index 00000000..2d99523b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1aligned__storage.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::aligned_storage< Len, Align > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::aligned_storage< Len, Align > Struct Template Reference
    +
    +
    + +

    std::aligned_storage +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef aligned_chunk< Align > type[Len/sizeof(aligned_chunk< Align >)]
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<size_t Len, size_t Align>
    + + + + +
    typedef aligned_chunk<Align> nv_std::aligned_storage< Len, Align >::type[Len/sizeof(aligned_chunk< Align >)]
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of-members.html b/docs/generated-html/structnv__std_1_1alignment__of-members.html new file mode 100644 index 00000000..ea6de866 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< value_t > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< value_t >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of.html b/docs/generated-html/structnv__std_1_1alignment__of.html new file mode 100644 index 00000000..de1689cb --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of.html @@ -0,0 +1,142 @@ + + + + + + + +Cutlass: nv_std::alignment_of< value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< value_t > Struct Template Reference
    +
    +
    + +

    std::alignment_of +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::alignment_of< value_t >:
    +
    +
    + + +nv_std::alignment_of< const value_t > +nv_std::alignment_of< const volatile value_t > +nv_std::alignment_of< volatile value_t > + +
    + + + + +

    +Classes

    struct  pad
     
    + + + +

    +Public Types

    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of.png b/docs/generated-html/structnv__std_1_1alignment__of.png new file mode 100644 index 00000000..8ee1b829 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1alignment__of.png differ diff --git a/docs/generated-html/structnv__std_1_1alignment__of_1_1pad-members.html b/docs/generated-html/structnv__std_1_1alignment__of_1_1pad-members.html new file mode 100644 index 00000000..b64284ef --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_1_1pad-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< value_t >::pad Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< value_t >::pad, including all inherited members.

    + + + +
    bytenv_std::alignment_of< value_t >::pad
    valnv_std::alignment_of< value_t >::pad
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_1_1pad.html b/docs/generated-html/structnv__std_1_1alignment__of_1_1pad.html new file mode 100644 index 00000000..3670e505 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_1_1pad.html @@ -0,0 +1,136 @@ + + + + + + + +Cutlass: nv_std::alignment_of< value_t >::pad Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< value_t >::pad Struct Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + + + +

    +Public Attributes

    value_t val
     
    char byte
     
    +

    Member Data Documentation

    + +

    ◆ byte

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    char nv_std::alignment_of< value_t >::pad::byte
    +
    + +
    +
    + +

    ◆ val

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    value_t nv_std::alignment_of< value_t >::pad::val
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4-members.html new file mode 100644 index 00000000..89d2ce10 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< const value_t > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< const value_t >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.html new file mode 100644 index 00000000..50c19a3a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: nv_std::alignment_of< const value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< const value_t > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::alignment_of< const value_t >:
    +
    +
    + + +nv_std::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.png b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.png new file mode 100644 index 00000000..3412a3c4 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html new file mode 100644 index 00000000..6b9668d7 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< const volatile value_t > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< const volatile value_t >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html new file mode 100644 index 00000000..167c865f --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: nv_std::alignment_of< const volatile value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< const volatile value_t > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::alignment_of< const volatile value_t >:
    +
    +
    + + +nv_std::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png new file mode 100644 index 00000000..39ebbbbc Binary files /dev/null and b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4-members.html new file mode 100644 index 00000000..8f7bf3db --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< double2 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< double2 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< double2 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4.html new file mode 100644 index 00000000..2fb8e549 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< double2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< double2 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4-members.html new file mode 100644 index 00000000..e92dc0f6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< double4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< double4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< double4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4.html new file mode 100644 index 00000000..ef1634f4 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< double4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< double4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4-members.html new file mode 100644 index 00000000..faaf84c5 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< float4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< float4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< float4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4.html new file mode 100644 index 00000000..8f4dda71 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< float4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< float4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4-members.html new file mode 100644 index 00000000..84fb87dd --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< int4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< int4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< int4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4.html new file mode 100644 index 00000000..f4319fff --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< int4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< int4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4-members.html new file mode 100644 index 00000000..65343cb7 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< long4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< long4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< long4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4.html new file mode 100644 index 00000000..b46cf676 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< long4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< long4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4-members.html new file mode 100644 index 00000000..dadea6bc --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< longlong2 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< longlong2 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< longlong2 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4.html new file mode 100644 index 00000000..0ab4ebe8 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< longlong2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< longlong2 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4-members.html new file mode 100644 index 00000000..40159270 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< longlong4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< longlong4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< longlong4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4.html new file mode 100644 index 00000000..02995dc6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< longlong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< longlong4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4-members.html new file mode 100644 index 00000000..7f1d7cb0 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< uint4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< uint4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< uint4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4.html new file mode 100644 index 00000000..f6e6b5da --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< uint4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< uint4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4-members.html new file mode 100644 index 00000000..1108074f --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< ulong4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< ulong4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< ulong4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4.html new file mode 100644 index 00000000..c1344383 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< ulong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< ulong4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4-members.html new file mode 100644 index 00000000..69d800e7 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< ulonglong2 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< ulonglong2 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< ulonglong2 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4.html new file mode 100644 index 00000000..fd582546 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< ulonglong2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< ulonglong2 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4-members.html new file mode 100644 index 00000000..71454d9c --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< ulonglong4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< ulonglong4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< ulonglong4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4.html new file mode 100644 index 00000000..b6572940 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< ulonglong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< ulonglong4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4-members.html new file mode 100644 index 00000000..c0747d0b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< volatile value_t > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< volatile value_t >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.html new file mode 100644 index 00000000..9d36cbdb --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: nv_std::alignment_of< volatile value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< volatile value_t > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::alignment_of< volatile value_t >:
    +
    +
    + + +nv_std::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.png b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.png new file mode 100644 index 00000000..3c0bfdeb Binary files /dev/null and b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1bool__constant-members.html b/docs/generated-html/structnv__std_1_1bool__constant-members.html new file mode 100644 index 00000000..bc6447d4 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1bool__constant-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::bool_constant< V > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1bool__constant.html b/docs/generated-html/structnv__std_1_1bool__constant.html new file mode 100644 index 00000000..f25c9d8b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1bool__constant.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::bool_constant< V > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::bool_constant< V > Struct Template Reference
    +
    +
    + +

    std::bool_constant +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::bool_constant< V >:
    +
    +
    + + +nv_std::integral_constant< bool, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< bool, V >
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< bool, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< bool, V >
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1bool__constant.png b/docs/generated-html/structnv__std_1_1bool__constant.png new file mode 100644 index 00000000..1596dfec Binary files /dev/null and b/docs/generated-html/structnv__std_1_1bool__constant.png differ diff --git a/docs/generated-html/structnv__std_1_1conditional-members.html b/docs/generated-html/structnv__std_1_1conditional-members.html new file mode 100644 index 00000000..a54081ba --- /dev/null +++ b/docs/generated-html/structnv__std_1_1conditional-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::conditional< B, T, F > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::conditional< B, T, F >, including all inherited members.

    + + +
    type typedefnv_std::conditional< B, T, F >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1conditional.html b/docs/generated-html/structnv__std_1_1conditional.html new file mode 100644 index 00000000..9d751817 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1conditional.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::conditional< B, T, F > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::conditional< B, T, F > Struct Template Reference
    +
    +
    + +

    std::conditional (true specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<bool B, class T, class F >
    + + + + +
    typedef T nv_std::conditional< B, T, F >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html b/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html new file mode 100644 index 00000000..3ed9624a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::conditional< false, T, F > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::conditional< false, T, F >, including all inherited members.

    + + +
    type typedefnv_std::conditional< false, T, F >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4.html b/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4.html new file mode 100644 index 00000000..7fbb1326 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::conditional< false, T, F > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::conditional< false, T, F > Struct Template Reference
    +
    +
    + +

    std::conditional (false specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef F type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<class T , class F >
    + + + + +
    typedef F nv_std::conditional< false, T, F >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1default__delete-members.html b/docs/generated-html/structnv__std_1_1default__delete-members.html new file mode 100644 index 00000000..7e892346 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1default__delete-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::default_delete< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::default_delete< T >, including all inherited members.

    + + +
    operator()(T *ptr) constnv_std::default_delete< T >inline
    + + + + diff --git a/docs/generated-html/structnv__std_1_1default__delete.html b/docs/generated-html/structnv__std_1_1default__delete.html new file mode 100644 index 00000000..084e2b7f --- /dev/null +++ b/docs/generated-html/structnv__std_1_1default__delete.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: nv_std::default_delete< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::default_delete< T > Struct Template Reference
    +
    +
    + +

    Default deleter. +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Member Functions

    void operator() (T *ptr) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + +
    void nv_std::default_delete< T >::operator() (T * ptr) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4-members.html b/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4-members.html new file mode 100644 index 00000000..7c038dde --- /dev/null +++ b/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::default_delete< T[]> Member List
    +
    +
    + +

    This is the complete list of members for nv_std::default_delete< T[]>, including all inherited members.

    + + +
    operator()(T *ptr) constnv_std::default_delete< T[]>inline
    + + + + diff --git a/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4.html b/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4.html new file mode 100644 index 00000000..c51c3518 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: nv_std::default_delete< T[]> Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::default_delete< T[]> Struct Template Reference
    +
    +
    + +

    Partial specialization for deleting array types. +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Member Functions

    void operator() (T *ptr) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + +
    void nv_std::default_delete< T[]>::operator() (T * ptr) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1enable__if-members.html b/docs/generated-html/structnv__std_1_1enable__if-members.html new file mode 100644 index 00000000..e5464c88 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1enable__if-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::enable_if< C, T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::enable_if< C, T >, including all inherited members.

    + + +
    type typedefnv_std::enable_if< C, T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1enable__if.html b/docs/generated-html/structnv__std_1_1enable__if.html new file mode 100644 index 00000000..ac720dd0 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1enable__if.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::enable_if< C, T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::enable_if< C, T > Struct Template Reference
    +
    +
    + +

    std::enable_if (true specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<bool C, typename T = void>
    + + + + +
    typedef T nv_std::enable_if< C, T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1enable__if_3_01false_00_01T_01_4.html b/docs/generated-html/structnv__std_1_1enable__if_3_01false_00_01T_01_4.html new file mode 100644 index 00000000..75fdd510 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1enable__if_3_01false_00_01T_01_4.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: nv_std::enable_if< false, T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::enable_if< false, T > Struct Template Reference
    +
    +
    + +

    std::enable_if (false specialization) +

    + +

    #include <nv_std.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1greater-members.html b/docs/generated-html/structnv__std_1_1greater-members.html new file mode 100644 index 00000000..b6fdba6d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1greater-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::greater< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::greater< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constnv_std::greater< T >inline
    + + + + diff --git a/docs/generated-html/structnv__std_1_1greater.html b/docs/generated-html/structnv__std_1_1greater.html new file mode 100644 index 00000000..c684839d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1greater.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: nv_std::greater< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::greater< T > Struct Template Reference
    +
    +
    + +

    std::greater +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexpr bool operator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::greater< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1integral__constant-members.html b/docs/generated-html/structnv__std_1_1integral__constant-members.html new file mode 100644 index 00000000..2b5bc406 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1integral__constant-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::integral_constant< value_t, V > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1integral__constant.html b/docs/generated-html/structnv__std_1_1integral__constant.html new file mode 100644 index 00000000..777f3f19 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1integral__constant.html @@ -0,0 +1,261 @@ + + + + + + + +Cutlass: nv_std::integral_constant< value_t, V > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::integral_constant< value_t, V > Struct Template Reference
    +
    +
    + +

    std::integral_constant +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::integral_constant< value_t, V >:
    +
    +
    + + +nv_std::is_pointer_helper< remove_cv< T >::type > +nv_std::is_same< void, remove_cv< T >::type > +nv_std::is_integral< T > +nv_std::is_integral< char > +nv_std::is_integral< int > +nv_std::is_integral< long > +nv_std::is_integral< long long > +nv_std::is_integral< short > +nv_std::is_integral< signed char > +nv_std::is_integral< unsigned char > +nv_std::is_integral< unsigned int > +nv_std::is_integral< unsigned long > +nv_std::is_integral< unsigned long long > +nv_std::is_integral< unsigned short > +nv_std::is_pointer_helper< T > +nv_std::is_pointer_helper< T * > +nv_std::is_same< A, B > +nv_std::is_same< A, A > +nv_std::is_volatile< T > +nv_std::is_volatile< volatile T > + +
    + + + + + + +

    +Public Types

    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    + + + +

    +Static Public Attributes

    static const value_t value = V
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + +
    typedef integral_constant<value_t, V> nv_std::integral_constant< value_t, V >::type
    +
    + +
    +
    + +

    ◆ value_type

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + +
    typedef value_t nv_std::integral_constant< value_t, V >::value_type
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ operator value_type()

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE nv_std::integral_constant< value_t, V >::operator value_type () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE const value_type nv_std::integral_constant< value_t, V >::operator() () const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ value

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + +
    const value_t nv_std::integral_constant< value_t, V >::value = V
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1integral__constant.png b/docs/generated-html/structnv__std_1_1integral__constant.png new file mode 100644 index 00000000..ddd792d7 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1integral__constant.png differ diff --git a/docs/generated-html/structnv__std_1_1is__arithmetic-members.html b/docs/generated-html/structnv__std_1_1is__arithmetic-members.html new file mode 100644 index 00000000..06f493c8 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__arithmetic-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_arithmetic< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__arithmetic.html b/docs/generated-html/structnv__std_1_1is__arithmetic.html new file mode 100644 index 00000000..7d591ca7 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__arithmetic.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_arithmetic< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_arithmetic< T > Struct Template Reference
    +
    +
    + +

    std::is_arithmetic +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_arithmetic< T >:
    +
    +
    + + +nv_std::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__arithmetic.png b/docs/generated-html/structnv__std_1_1is__arithmetic.png new file mode 100644 index 00000000..34a184c1 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__arithmetic.png differ diff --git a/docs/generated-html/structnv__std_1_1is__base__of-members.html b/docs/generated-html/structnv__std_1_1is__base__of-members.html new file mode 100644 index 00000000..599a2929 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_base_of< BaseT, DerivedT > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__base__of.html b/docs/generated-html/structnv__std_1_1is__base__of.html new file mode 100644 index 00000000..9f39064c --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_base_of< BaseT, DerivedT > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_base_of< BaseT, DerivedT > Struct Template Reference
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__base__of.png b/docs/generated-html/structnv__std_1_1is__base__of.png new file mode 100644 index 00000000..269edcce Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__base__of.png differ diff --git a/docs/generated-html/structnv__std_1_1is__base__of__helper-members.html b/docs/generated-html/structnv__std_1_1is__base__of__helper-members.html new file mode 100644 index 00000000..fb28f514 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of__helper-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_base_of_helper< BaseT, DerivedT > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__base__of__helper.html b/docs/generated-html/structnv__std_1_1is__base__of__helper.html new file mode 100644 index 00000000..e2675e2d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of__helper.html @@ -0,0 +1,264 @@ + + + + + + + +Cutlass: nv_std::is_base_of_helper< BaseT, DerivedT > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_base_of_helper< BaseT, DerivedT > Struct Template Reference
    +
    +
    + +

    Helper for std::is_base_of. +

    + +

    #include <nv_std.h>

    + + + + +

    +Classes

    struct  dummy
     
    + + + + + +

    +Public Types

    typedef char(& yes)[1]
     
    typedef char(& no)[2]
     
    + + + + + + +

    +Static Public Member Functions

    template<typename T >
    static CUTLASS_HOST_DEVICE yes check (DerivedT *, T)
     
    static CUTLASS_HOST_DEVICE no check (BaseT *, int)
     
    + + + +

    +Static Public Attributes

    static const bool value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes)
     
    +

    Member Typedef Documentation

    + +

    ◆ no

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + +
    typedef char(& nv_std::is_base_of_helper< BaseT, DerivedT >::no)[2]
    +
    + +
    +
    + +

    ◆ yes

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + +
    typedef char(& nv_std::is_base_of_helper< BaseT, DerivedT >::yes)[1]
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ check() [1/2]

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    static CUTLASS_HOST_DEVICE yes nv_std::is_base_of_helper< BaseT, DerivedT >::check (DerivedT * ,
     
    )
    +
    +static
    +
    + +
    +
    + +

    ◆ check() [2/2]

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    static CUTLASS_HOST_DEVICE no nv_std::is_base_of_helper< BaseT, DerivedT >::check (BaseT * ,
    int  
    )
    +
    +static
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ value

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + + +
    + + + + +
    const bool nv_std::is_base_of_helper< BaseT, DerivedT >::value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy-members.html b/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy-members.html new file mode 100644 index 00000000..29895815 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy.html b/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy.html new file mode 100644 index 00000000..5d927f04 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy.html @@ -0,0 +1,146 @@ + + + + + + + +Cutlass: nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE operator B* () const
     
    CUTLASS_HOST_DEVICE operator D* ()
     
    +

    Member Function Documentation

    + +

    ◆ operator B*()

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename B , typename D >
    + + + + + + + +
    CUTLASS_HOST_DEVICE nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >::operator B* () const
    +
    + +
    +
    + +

    ◆ operator D*()

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename B , typename D >
    + + + + + + + +
    CUTLASS_HOST_DEVICE nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >::operator D* ()
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__floating__point-members.html b/docs/generated-html/structnv__std_1_1is__floating__point-members.html new file mode 100644 index 00000000..4897d96f --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__floating__point-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_floating_point< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__floating__point.html b/docs/generated-html/structnv__std_1_1is__floating__point.html new file mode 100644 index 00000000..0d4eacf0 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__floating__point.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_floating_point< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_floating_point< T > Struct Template Reference
    +
    +
    + +

    std::is_floating_point +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_floating_point< T >:
    +
    +
    + + +nv_std::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__floating__point.png b/docs/generated-html/structnv__std_1_1is__floating__point.png new file mode 100644 index 00000000..b8e015f6 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__floating__point.png differ diff --git a/docs/generated-html/structnv__std_1_1is__fundamental-members.html b/docs/generated-html/structnv__std_1_1is__fundamental-members.html new file mode 100644 index 00000000..df714c51 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__fundamental-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_fundamental< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__fundamental.html b/docs/generated-html/structnv__std_1_1is__fundamental.html new file mode 100644 index 00000000..943eed37 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__fundamental.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_fundamental< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_fundamental< T > Struct Template Reference
    +
    +
    + +

    std::is_fundamental +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_fundamental< T >:
    +
    +
    + + +nv_std::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__fundamental.png b/docs/generated-html/structnv__std_1_1is__fundamental.png new file mode 100644 index 00000000..1571b366 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__fundamental.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral-members.html b/docs/generated-html/structnv__std_1_1is__integral-members.html new file mode 100644 index 00000000..634ff100 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral.html b/docs/generated-html/structnv__std_1_1is__integral.html new file mode 100644 index 00000000..ab22727f --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: nv_std::is_integral< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< T > Struct Template Reference
    +
    +
    + +

    std::is_integral +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< T >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > +nv_std::is_integral< const T > +nv_std::is_integral< const volatile T > +nv_std::is_integral< volatile T > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral.png b/docs/generated-html/structnv__std_1_1is__integral.png new file mode 100644 index 00000000..103543a5 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4-members.html new file mode 100644 index 00000000..4d478293 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.html new file mode 100644 index 00000000..fbe4e279 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< char > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< char >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.png new file mode 100644 index 00000000..dd7231fb Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4-members.html new file mode 100644 index 00000000..6005eff6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< const T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.html new file mode 100644 index 00000000..83e32731 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::is_integral< const T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< const T > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< const T >:
    +
    +
    + + +nv_std::is_integral< T > +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.png new file mode 100644 index 00000000..fa5f4449 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4-members.html new file mode 100644 index 00000000..4fe1de69 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< const volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.html new file mode 100644 index 00000000..cea9430b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::is_integral< const volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< const volatile T > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< const volatile T >:
    +
    +
    + + +nv_std::is_integral< T > +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.png new file mode 100644 index 00000000..10bfdda5 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4-members.html new file mode 100644 index 00000000..976aa626 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< int > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.html new file mode 100644 index 00000000..8035a7e0 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< int > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< int >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.png new file mode 100644 index 00000000..a3aa3bbc Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4-members.html new file mode 100644 index 00000000..3ac3f002 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.html new file mode 100644 index 00000000..393aff41 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< long > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< long >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.png new file mode 100644 index 00000000..34a3098a Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4-members.html new file mode 100644 index 00000000..ac258768 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< long long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.html new file mode 100644 index 00000000..58992abd --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< long long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< long long > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< long long >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.png new file mode 100644 index 00000000..9fde52ca Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4-members.html new file mode 100644 index 00000000..44905047 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< short > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.html new file mode 100644 index 00000000..1879b40c --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< short > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< short > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< short >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.png new file mode 100644 index 00000000..204a67b0 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4-members.html new file mode 100644 index 00000000..50ae470f --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< signed char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.html new file mode 100644 index 00000000..a376fd7e --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< signed char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< signed char > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< signed char >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.png new file mode 100644 index 00000000..655781c8 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4-members.html new file mode 100644 index 00000000..7b46c2ff --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< unsigned char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.html new file mode 100644 index 00000000..9c269741 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< unsigned char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< unsigned char > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< unsigned char >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.png new file mode 100644 index 00000000..f2abe732 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4-members.html new file mode 100644 index 00000000..7490e1c6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< unsigned int > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.html new file mode 100644 index 00000000..3bd2d119 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< unsigned int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< unsigned int > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< unsigned int >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.png new file mode 100644 index 00000000..592a8006 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4-members.html new file mode 100644 index 00000000..dffcd7d1 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< unsigned long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.html new file mode 100644 index 00000000..ea0a7215 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< unsigned long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< unsigned long > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< unsigned long >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.png new file mode 100644 index 00000000..b28e466d Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html new file mode 100644 index 00000000..95b2eee5 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< unsigned long long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.html new file mode 100644 index 00000000..bd4b65a1 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< unsigned long long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< unsigned long long > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< unsigned long long >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.png new file mode 100644 index 00000000..edf19e69 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4-members.html new file mode 100644 index 00000000..42071ca6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< unsigned short > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.html new file mode 100644 index 00000000..d9550a3a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< unsigned short > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< unsigned short > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< unsigned short >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.png new file mode 100644 index 00000000..109c6d71 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4-members.html new file mode 100644 index 00000000..e1b735b5 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.html new file mode 100644 index 00000000..3546a2d6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::is_integral< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< volatile T > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< volatile T >:
    +
    +
    + + +nv_std::is_integral< T > +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.png new file mode 100644 index 00000000..7709eeaf Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__pointer-members.html b/docs/generated-html/structnv__std_1_1is__pointer-members.html new file mode 100644 index 00000000..425e172d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_pointer< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer.html b/docs/generated-html/structnv__std_1_1is__pointer.html new file mode 100644 index 00000000..e51ffc1b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: nv_std::is_pointer< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_pointer< T > Struct Template Reference
    +
    +
    + +

    std::is_pointer +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_pointer< T >:
    +
    +
    + + +nv_std::is_pointer_helper< remove_cv< T >::type > +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer.png b/docs/generated-html/structnv__std_1_1is__pointer.png new file mode 100644 index 00000000..930d7ca5 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__pointer.png differ diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper-members.html b/docs/generated-html/structnv__std_1_1is__pointer__helper-members.html new file mode 100644 index 00000000..b1d6a93d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer__helper-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_pointer_helper< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper.html b/docs/generated-html/structnv__std_1_1is__pointer__helper.html new file mode 100644 index 00000000..cac3fa60 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer__helper.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_pointer_helper< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_pointer_helper< T > Struct Template Reference
    +
    +
    + +

    Helper for std::is_pointer (false specialization) +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_pointer_helper< T >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper.png b/docs/generated-html/structnv__std_1_1is__pointer__helper.png new file mode 100644 index 00000000..ac03b267 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__pointer__helper.png differ diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4-members.html b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4-members.html new file mode 100644 index 00000000..26982361 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_pointer_helper< T * > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.html b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.html new file mode 100644 index 00000000..0d3db722 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_pointer_helper< T * > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_pointer_helper< T * > Struct Template Reference
    +
    +
    + +

    Helper for std::is_pointer (true specialization) +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_pointer_helper< T * >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.png b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.png new file mode 100644 index 00000000..200380c6 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__same-members.html b/docs/generated-html/structnv__std_1_1is__same-members.html new file mode 100644 index 00000000..679cd52d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__same-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_same< A, B > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__same.html b/docs/generated-html/structnv__std_1_1is__same.html new file mode 100644 index 00000000..ebad17d2 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__same.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_same< A, B > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_same< A, B > Struct Template Reference
    +
    +
    + +

    std::is_same (false specialization) +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_same< A, B >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__same.png b/docs/generated-html/structnv__std_1_1is__same.png new file mode 100644 index 00000000..1417d349 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__same.png differ diff --git a/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4-members.html b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4-members.html new file mode 100644 index 00000000..0ac28c8d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_same< A, A > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.html b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.html new file mode 100644 index 00000000..7a5fc109 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_same< A, A > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_same< A, A > Struct Template Reference
    +
    +
    + +

    std::is_same (true specialization) +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_same< A, A >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.png b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.png new file mode 100644 index 00000000..0d9a64a1 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1is__trivially__copyable-members.html b/docs/generated-html/structnv__std_1_1is__trivially__copyable-members.html new file mode 100644 index 00000000..4fabae90 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__trivially__copyable-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_trivially_copyable< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__trivially__copyable.html b/docs/generated-html/structnv__std_1_1is__trivially__copyable.html new file mode 100644 index 00000000..d8af9cc3 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__trivially__copyable.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: nv_std::is_trivially_copyable< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_trivially_copyable< T > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_trivially_copyable< T >:
    +
    +
    + + +nv_std::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    static const bool value
     
    +

    Detailed Description

    +

    template<typename T>
    +struct nv_std::is_trivially_copyable< T >

    + +

    std::is_trivially_copyable

    +

    This implementation only evaluates true if T is fundamental or pointer

    +

    Without help from partial template specializations provided by the user for a specific class or struct, this trait will never report that the specified class or struct is trivially-copyable ; this is always safe, if possibly sub-optimal.

    +

    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__trivially__copyable.png b/docs/generated-html/structnv__std_1_1is__trivially__copyable.png new file mode 100644 index 00000000..464a678d Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__trivially__copyable.png differ diff --git a/docs/generated-html/structnv__std_1_1is__void-members.html b/docs/generated-html/structnv__std_1_1is__void-members.html new file mode 100644 index 00000000..0ed9c5d5 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__void-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_void< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__void.html b/docs/generated-html/structnv__std_1_1is__void.html new file mode 100644 index 00000000..908f72f0 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__void.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: nv_std::is_void< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_void< T > Struct Template Reference
    +
    +
    + +

    std::is_void +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_void< T >:
    +
    +
    + + +nv_std::is_same< void, remove_cv< T >::type > +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__void.png b/docs/generated-html/structnv__std_1_1is__void.png new file mode 100644 index 00000000..91bc2f5c Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__void.png differ diff --git a/docs/generated-html/structnv__std_1_1is__volatile-members.html b/docs/generated-html/structnv__std_1_1is__volatile-members.html new file mode 100644 index 00000000..fc433c5a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__volatile-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_volatile< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__volatile.html b/docs/generated-html/structnv__std_1_1is__volatile.html new file mode 100644 index 00000000..a34d6b70 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__volatile.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_volatile< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_volatile< T > Struct Template Reference
    +
    +
    + +

    std::is_volatile +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_volatile< T >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__volatile.png b/docs/generated-html/structnv__std_1_1is__volatile.png new file mode 100644 index 00000000..e1075605 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__volatile.png differ diff --git a/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4-members.html b/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4-members.html new file mode 100644 index 00000000..ca023b46 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_volatile< volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.html b/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.html new file mode 100644 index 00000000..1cec586a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_volatile< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_volatile< volatile T > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_volatile< volatile T >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.png b/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.png new file mode 100644 index 00000000..ee469288 Binary files /dev/null and b/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.png differ diff --git a/docs/generated-html/structnv__std_1_1less-members.html b/docs/generated-html/structnv__std_1_1less-members.html new file mode 100644 index 00000000..e09e31c6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1less-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::less< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::less< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constnv_std::less< T >inline
    + + + + diff --git a/docs/generated-html/structnv__std_1_1less.html b/docs/generated-html/structnv__std_1_1less.html new file mode 100644 index 00000000..83b22cdd --- /dev/null +++ b/docs/generated-html/structnv__std_1_1less.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: nv_std::less< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::less< T > Struct Template Reference
    +
    +
    + +

    std::less +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexpr bool operator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::less< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1nullptr__t.html b/docs/generated-html/structnv__std_1_1nullptr__t.html new file mode 100644 index 00000000..09279c72 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1nullptr__t.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: nv_std::nullptr_t Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::nullptr_t Struct Reference
    +
    +
    + +

    std::nullptr_t +

    + +

    #include <nv_std.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1plus-members.html b/docs/generated-html/structnv__std_1_1plus-members.html new file mode 100644 index 00000000..4470e45d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1plus-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::plus< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::plus< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constnv_std::plus< T >inline
    + + + + diff --git a/docs/generated-html/structnv__std_1_1plus.html b/docs/generated-html/structnv__std_1_1plus.html new file mode 100644 index 00000000..929e75d6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1plus.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: nv_std::plus< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::plus< T > Struct Template Reference
    +
    +
    + +

    nv_std::plus +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexproperator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr T nv_std::plus< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__const-members.html b/docs/generated-html/structnv__std_1_1remove__const-members.html new file mode 100644 index 00000000..b9647b8b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__const-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::remove_const< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::remove_const< T >, including all inherited members.

    + + +
    type typedefnv_std::remove_const< T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__const.html b/docs/generated-html/structnv__std_1_1remove__const.html new file mode 100644 index 00000000..b371ed23 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__const.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::remove_const< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::remove_const< T > Struct Template Reference
    +
    +
    + +

    std::remove_const (non-const specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T>
    + + + + +
    typedef T nv_std::remove_const< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4-members.html b/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4-members.html new file mode 100644 index 00000000..4bee0ceb --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::remove_const< const T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::remove_const< const T >, including all inherited members.

    + + +
    type typedefnv_std::remove_const< const T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4.html b/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4.html new file mode 100644 index 00000000..b33f95d4 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::remove_const< const T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::remove_const< const T > Struct Template Reference
    +
    +
    + +

    std::remove_const (const specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef T nv_std::remove_const< const T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__cv-members.html b/docs/generated-html/structnv__std_1_1remove__cv-members.html new file mode 100644 index 00000000..740ac298 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__cv-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::remove_cv< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::remove_cv< T >, including all inherited members.

    + + +
    type typedefnv_std::remove_cv< T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__cv.html b/docs/generated-html/structnv__std_1_1remove__cv.html new file mode 100644 index 00000000..58a19ab9 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__cv.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::remove_cv< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::remove_cv< T > Struct Template Reference
    +
    +
    + +

    std::remove_cv +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef remove_volatile< typename remove_const< T >::type >::type type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef remove_volatile<typename remove_const<T>::type>::type nv_std::remove_cv< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__volatile-members.html b/docs/generated-html/structnv__std_1_1remove__volatile-members.html new file mode 100644 index 00000000..9bb367d5 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__volatile-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::remove_volatile< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::remove_volatile< T >, including all inherited members.

    + + +
    type typedefnv_std::remove_volatile< T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__volatile.html b/docs/generated-html/structnv__std_1_1remove__volatile.html new file mode 100644 index 00000000..a77e3a17 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__volatile.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::remove_volatile< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::remove_volatile< T > Struct Template Reference
    +
    +
    + +

    std::remove_volatile (non-volatile specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T>
    + + + + +
    typedef T nv_std::remove_volatile< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4-members.html b/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4-members.html new file mode 100644 index 00000000..1d84101a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::remove_volatile< volatile T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::remove_volatile< volatile T >, including all inherited members.

    + + +
    type typedefnv_std::remove_volatile< volatile T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4.html b/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4.html new file mode 100644 index 00000000..1b8cc6b0 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::remove_volatile< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::remove_volatile< volatile T > Struct Template Reference
    +
    +
    + +

    std::remove_volatile (volatile specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef T nv_std::remove_volatile< volatile T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/sync_off.png b/docs/generated-html/sync_off.png new file mode 100644 index 00000000..205d0717 Binary files /dev/null and b/docs/generated-html/sync_off.png differ diff --git a/docs/generated-html/sync_on.png b/docs/generated-html/sync_on.png new file mode 100644 index 00000000..e82391b3 Binary files /dev/null and b/docs/generated-html/sync_on.png differ diff --git a/docs/generated-html/tab_a.png b/docs/generated-html/tab_a.png new file mode 100644 index 00000000..f771cc2b Binary files /dev/null and b/docs/generated-html/tab_a.png differ diff --git a/docs/generated-html/tab_b.png b/docs/generated-html/tab_b.png new file mode 100644 index 00000000..6c2f358e Binary files /dev/null and b/docs/generated-html/tab_b.png differ diff --git a/docs/generated-html/tab_h.png b/docs/generated-html/tab_h.png new file mode 100644 index 00000000..e883d956 Binary files /dev/null and b/docs/generated-html/tab_h.png differ diff --git a/docs/generated-html/tab_s.png b/docs/generated-html/tab_s.png new file mode 100644 index 00000000..f62088b4 Binary files /dev/null and b/docs/generated-html/tab_s.png differ diff --git a/docs/generated-html/tabs.css b/docs/generated-html/tabs.css new file mode 100644 index 00000000..a28614b8 --- /dev/null +++ b/docs/generated-html/tabs.css @@ -0,0 +1 @@ +.sm{position:relative;z-index:9999}.sm,.sm ul,.sm li{display:block;list-style:none;margin:0;padding:0;line-height:normal;direction:ltr;text-align:left;-webkit-tap-highlight-color:rgba(0,0,0,0)}.sm-rtl,.sm-rtl ul,.sm-rtl li{direction:rtl;text-align:right}.sm>li>h1,.sm>li>h2,.sm>li>h3,.sm>li>h4,.sm>li>h5,.sm>li>h6{margin:0;padding:0}.sm ul{display:none}.sm li,.sm a{position:relative}.sm a{display:block}.sm a.disabled{cursor:not-allowed}.sm:after{content:"\00a0";display:block;height:0;font:0/0 serif;clear:both;visibility:hidden;overflow:hidden}.sm,.sm *,.sm *:before,.sm *:after{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}#doc-content{overflow:auto;display:block;padding:0;margin:0;-webkit-overflow-scrolling:touch}.sm-dox{background-image:url("tab_b.png")}.sm-dox a,.sm-dox a:focus,.sm-dox a:hover,.sm-dox a:active{padding:0 12px;padding-right:43px;font-family:"Lucida Grande","Geneva","Helvetica",Arial,sans-serif;font-size:13px;font-weight:bold;line-height:36px;text-decoration:none;text-shadow:0 1px 1px rgba(255,255,255,0.9);color:#283a5d;outline:0}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a.current{color:#d23600}.sm-dox a.disabled{color:#bbb}.sm-dox a span.sub-arrow{position:absolute;top:50%;margin-top:-14px;left:auto;right:3px;width:28px;height:28px;overflow:hidden;font:bold 12px/28px monospace!important;text-align:center;text-shadow:none;background:rgba(255,255,255,0.5);-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox a.highlighted span.sub-arrow:before{display:block;content:'-'}.sm-dox>li:first-child>a,.sm-dox>li:first-child>:not(ul) a{-moz-border-radius:5px 5px 0 0;-webkit-border-radius:5px;border-radius:5px 5px 0 0}.sm-dox>li:last-child>a,.sm-dox>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul{-moz-border-radius:0 0 5px 5px;-webkit-border-radius:0;border-radius:0 0 5px 5px}.sm-dox>li:last-child>a.highlighted,.sm-dox>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted{-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox ul{background:rgba(162,162,162,0.1)}.sm-dox ul a,.sm-dox ul a:focus,.sm-dox ul a:hover,.sm-dox ul a:active{font-size:12px;border-left:8px solid transparent;line-height:36px;text-shadow:none;background-color:white;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul ul a,.sm-dox ul ul a:hover,.sm-dox ul ul a:focus,.sm-dox ul ul a:active{border-left:16px solid transparent}.sm-dox ul ul ul a,.sm-dox ul ul ul a:hover,.sm-dox ul ul ul a:focus,.sm-dox ul ul ul a:active{border-left:24px solid transparent}.sm-dox ul ul ul ul a,.sm-dox ul ul ul ul a:hover,.sm-dox ul ul ul ul a:focus,.sm-dox ul ul ul ul a:active{border-left:32px solid transparent}.sm-dox ul ul ul ul ul a,.sm-dox ul ul ul ul ul a:hover,.sm-dox ul ul ul ul ul a:focus,.sm-dox ul ul ul ul ul a:active{border-left:40px solid transparent}@media(min-width:768px){.sm-dox ul{position:absolute;width:12em}.sm-dox li{float:left}.sm-dox.sm-rtl li{float:right}.sm-dox ul li,.sm-dox.sm-rtl ul li,.sm-dox.sm-vertical li{float:none}.sm-dox a{white-space:nowrap}.sm-dox ul a,.sm-dox.sm-vertical a{white-space:normal}.sm-dox .sm-nowrap>li>a,.sm-dox .sm-nowrap>li>:not(ul) a{white-space:nowrap}.sm-dox{padding:0 10px;background-image:url("tab_b.png");line-height:36px}.sm-dox a span.sub-arrow{top:50%;margin-top:-2px;right:12px;width:0;height:0;border-width:4px;border-style:solid dashed dashed dashed;border-color:#283a5d transparent transparent transparent;background:transparent;-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox a,.sm-dox a:focus,.sm-dox a:active,.sm-dox a:hover,.sm-dox a.highlighted{padding:0 12px;background-image:url("tab_s.png");background-repeat:no-repeat;background-position:right;-moz-border-radius:0!important;-webkit-border-radius:0;border-radius:0!important}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a:hover span.sub-arrow{border-color:white transparent transparent transparent}.sm-dox a.has-submenu{padding-right:24px}.sm-dox li{border-top:0}.sm-dox>li>ul:before,.sm-dox>li>ul:after{content:'';position:absolute;top:-18px;left:30px;width:0;height:0;overflow:hidden;border-width:9px;border-style:dashed dashed solid dashed;border-color:transparent transparent #bbb transparent}.sm-dox>li>ul:after{top:-16px;left:31px;border-width:8px;border-color:transparent transparent #fff transparent}.sm-dox ul{border:1px solid #bbb;padding:5px 0;background:#fff;-moz-border-radius:5px!important;-webkit-border-radius:5px;border-radius:5px!important;-moz-box-shadow:0 5px 9px rgba(0,0,0,0.2);-webkit-box-shadow:0 5px 9px rgba(0,0,0,0.2);box-shadow:0 5px 9px rgba(0,0,0,0.2)}.sm-dox ul a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-color:transparent transparent transparent #555;border-style:dashed dashed dashed solid}.sm-dox ul a,.sm-dox ul a:hover,.sm-dox ul a:focus,.sm-dox ul a:active,.sm-dox ul a.highlighted{color:#555;background-image:none;border:0!important;color:#555;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul a:hover span.sub-arrow{border-color:transparent transparent transparent white}.sm-dox span.scroll-up,.sm-dox span.scroll-down{position:absolute;display:none;visibility:hidden;overflow:hidden;background:#fff;height:36px}.sm-dox span.scroll-up:hover,.sm-dox span.scroll-down:hover{background:#eee}.sm-dox span.scroll-up:hover span.scroll-up-arrow,.sm-dox span.scroll-up:hover span.scroll-down-arrow{border-color:transparent transparent #d23600 transparent}.sm-dox span.scroll-down:hover span.scroll-down-arrow{border-color:#d23600 transparent transparent transparent}.sm-dox span.scroll-up-arrow,.sm-dox span.scroll-down-arrow{position:absolute;top:0;left:50%;margin-left:-6px;width:0;height:0;overflow:hidden;border-width:6px;border-style:dashed dashed solid dashed;border-color:transparent transparent #555 transparent}.sm-dox span.scroll-down-arrow{top:8px;border-style:solid dashed dashed dashed;border-color:#555 transparent transparent transparent}.sm-dox.sm-rtl a.has-submenu{padding-right:12px;padding-left:24px}.sm-dox.sm-rtl a span.sub-arrow{right:auto;left:12px}.sm-dox.sm-rtl.sm-vertical a.has-submenu{padding:10px 20px}.sm-dox.sm-rtl.sm-vertical a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-rtl>li>ul:before{left:auto;right:30px}.sm-dox.sm-rtl>li>ul:after{left:auto;right:31px}.sm-dox.sm-rtl ul a.has-submenu{padding:10px 20px!important}.sm-dox.sm-rtl ul a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-vertical{padding:10px 0;-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox.sm-vertical a{padding:10px 20px}.sm-dox.sm-vertical a:hover,.sm-dox.sm-vertical a:focus,.sm-dox.sm-vertical a:active,.sm-dox.sm-vertical a.highlighted{background:#fff}.sm-dox.sm-vertical a.disabled{background-image:url("tab_b.png")}.sm-dox.sm-vertical a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-style:dashed dashed dashed solid;border-color:transparent transparent transparent #555}.sm-dox.sm-vertical>li>ul:before,.sm-dox.sm-vertical>li>ul:after{display:none}.sm-dox.sm-vertical ul a{padding:10px 20px}.sm-dox.sm-vertical ul a:hover,.sm-dox.sm-vertical ul a:focus,.sm-dox.sm-vertical ul a:active,.sm-dox.sm-vertical ul a.highlighted{background:#eee}.sm-dox.sm-vertical ul a.disabled{background:#fff}} \ No newline at end of file diff --git a/docs/generated-html/tensor__ref_8h.html b/docs/generated-html/tensor__ref_8h.html new file mode 100644 index 00000000..14314c66 --- /dev/null +++ b/docs/generated-html/tensor__ref_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: tensor_ref.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    tensor_ref.h File Reference
    +
    +
    + +

    Defines a structure containing strides, bounds, and a pointer to tensor data. +More...

    +
    #include <typeinfo>
    +#include <cutlass/coord.h>
    +#include <cutlass/cutlass.h>
    +#include <cutlass/vector.h>
    +
    +

    Go to the source code of this file.

    + + + + + +

    +Classes

    class  cutlass::TensorRef< Storage_, Rank_ >
     Structure modeling a pointer and stride into a tensor. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/tensor__ref_8h_source.html b/docs/generated-html/tensor__ref_8h_source.html new file mode 100644 index 00000000..8031da7d --- /dev/null +++ b/docs/generated-html/tensor__ref_8h_source.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: tensor_ref.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    tensor_ref.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <typeinfo>
    31 
    32 #include <cutlass/coord.h>
    33 #include <cutlass/cutlass.h>
    34 #include <cutlass/vector.h>
    35 
    36 namespace cutlass {
    37 
    39 
    41 template <typename Storage_, int Rank_>
    42 class TensorRef {
    43  public:
    45  typedef Storage_ Storage;
    46 
    48  static int const Rank = Rank_;
    49 
    50  private:
    51  //
    52  // Data members
    53  //
    54 
    56  Storage* ptr_;
    57 
    59  Coord<Rank> stride_;
    60 
    61  public:
    62  //
    63  // Methods
    64  //
    65 
    68  TensorRef() : ptr_(nullptr) {}
    69 
    72  TensorRef(Storage* ptr, Coord<Rank> stride) : ptr_(ptr), stride_(stride) {}
    73 
    76  void reset(Storage* ptr = nullptr, Coord<Rank> stride = Coord<Rank>(0)) {
    77  ptr_ = ptr;
    78  stride_ = stride;
    79  }
    80 
    82  template <typename T>
    84  Coord<Rank> converted_stride;
    85  for (int i = 0; i < Rank - 1; ++i) {
    86  converted_stride[i] = stride_[i] * Extent<Storage>::kValue / Extent<T>::kValue;
    87  }
    88  converted_stride[Rank - 1] = stride_[Rank - 1];
    89 
    90  return TensorRef<T, Rank>(reinterpret_cast<T*>(ptr_), converted_stride);
    91  }
    92 
    95  bool good() const { return ptr_ != nullptr; }
    96 
    99  Storage* data() const { return ptr_; }
    100 
    103  Coord<Rank> const& stride() const { return stride_; }
    104 
    107  int const& stride(int dim) const { return stride_.at(dim); }
    108 
    111  int leading_dim() const { return __NV_STD_MAX(stride_[1], stride_[2]); }
    112 
    115  long long offset(Coord<Rank> const& coord) const {
    116  return stride_.template dot<long long>(coord);
    117  }
    118 
    121  Storage& at(Coord<Rank> const& coord) const { return ptr_[offset(coord)]; }
    122 
    124  Storage& operator[](Coord<Rank> const& coord) const { return at(coord); }
    125 
    128  Storage& at(int idx) const { return ptr_[idx]; }
    129 
    131  Storage& operator[](int idx) const { return at(idx); }
    132 
    136  ptr_ += offset(b);
    137  return *this;
    138  }
    139 
    142  TensorRef operator+(Coord<Rank> const& b) const { return TensorRef(ptr_ + offset(b), stride_); }
    143 
    146  TensorRef operator-(Coord<Rank> const& b) const { return TensorRef(ptr_ - offset(b), stride_); }
    147 };
    148 
    150 
    151 } // namespace cutlass
    CUTLASS_HOST_DEVICE int const & stride(int dim) const
    Returns the stride of the tensor in the given dimension.
    Definition: tensor_ref.h:107
    +
    Storage & operator[](int idx) const
    Element-wise accessor.
    Definition: tensor_ref.h:131
    +
    Definition: convert.h:33
    +
    CUTLASS_HOST_DEVICE Storage & at(Coord< Rank > const &coord) const
    Returns a reference to the element at a given Coord.
    Definition: tensor_ref.h:121
    +
    CUTLASS_HOST_DEVICE TensorRef & advance(Coord< Rank > const &b)
    Adds an offset to the pointer.
    Definition: tensor_ref.h:135
    +
    static int const Rank
    Rank of tensor.
    Definition: tensor_ref.h:48
    +
    CUTLASS_HOST_DEVICE TensorRef operator+(Coord< Rank > const &b) const
    Returns a TensorRef offset by a given amount.
    Definition: tensor_ref.h:142
    +
    A Coord is a coordinate of arbitrary rank into a tensor or matrix.
    +
    Storage_ Storage
    Data type of individual access.
    Definition: tensor_ref.h:45
    +
    CUTLASS_HOST_DEVICE TensorRef operator-(Coord< Rank > const &b) const
    Returns a TensorRef offset by a given amount.
    Definition: tensor_ref.h:146
    +
    #define __NV_STD_MAX(a, b)
    Select maximum(a, b)
    Definition: platform.h:155
    +
    CUTLASS_HOST_DEVICE int leading_dim() const
    Returns the maximum stride element as the &#39;leading dimension&#39;.
    Definition: tensor_ref.h:111
    +
    CUTLASS_HOST_DEVICE Storage * data() const
    Returns the pointer to referenced data.
    Definition: tensor_ref.h:99
    +
    CUTLASS_HOST_DEVICE TensorRef(Storage *ptr, Coord< Rank > stride)
    Constructs from a pointer, size, and stride.
    Definition: tensor_ref.h:72
    +
    Storage & operator[](Coord< Rank > const &coord) const
    Element-wise accessor.
    Definition: tensor_ref.h:124
    +
    #define nullptr
    nullptr
    Definition: platform.h:136
    +
    CUTLASS_HOST_DEVICE long long offset(Coord< Rank > const &coord) const
    Computes the offset of an index from the origin of the tensor.
    Definition: tensor_ref.h:115
    +
    Structure modeling a pointer and stride into a tensor.
    Definition: tensor_ref.h:42
    +
    TensorRef< T, Rank > convert()
    Conversion function.
    Definition: tensor_ref.h:83
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    CUTLASS_HOST_DEVICE bool good() const
    Returns true if the TensorRef may be safely accessed.
    Definition: tensor_ref.h:95
    + +
    Defines a 1D vector of elements held in the registers of each thread.
    +
    CUTLASS_HOST_DEVICE void reset(Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))
    Updates the pointer, stride, and location within a TensorRef.
    Definition: tensor_ref.h:76
    +
    CUTLASS_HOST_DEVICE int & at()
    Gets the index of a given Coord element.
    Definition: coord.h:185
    +
    CUTLASS_HOST_DEVICE Coord< Rank > const & stride() const
    Returns the stride of the tensor.
    Definition: tensor_ref.h:103
    +
    Basic include for CUTLASS macros.
    +
    CUTLASS_HOST_DEVICE Storage & at(int idx) const
    Returns a reference to the element at a given Coord.
    Definition: tensor_ref.h:128
    +
    CUTLASS_HOST_DEVICE TensorRef()
    Default ctor.
    Definition: tensor_ref.h:68
    +
    Returns the extent of a scalar or vector.
    Definition: vector.h:161
    +
    + + + + diff --git a/docs/generated-html/tensor__view_8h.html b/docs/generated-html/tensor__view_8h.html new file mode 100644 index 00000000..7fa3cfcf --- /dev/null +++ b/docs/generated-html/tensor__view_8h.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: tensor_view.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    tensor_view.h File Reference
    +
    +
    + +

    Defines a structure containing strides and a pointer to tensor data. +More...

    +
    #include <cmath>
    +#include <cutlass/cutlass.h>
    +#include <cutlass/tensor_ref.h>
    +
    +

    Go to the source code of this file.

    + + + + + +

    +Classes

    class  cutlass::TensorView< T >
     Host-side reference implementation of tensor operations. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/tensor__view_8h_source.html b/docs/generated-html/tensor__view_8h_source.html new file mode 100644 index 00000000..65585402 --- /dev/null +++ b/docs/generated-html/tensor__view_8h_source.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: tensor_view.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    tensor_view.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cmath>
    32 
    33 #include <cutlass/cutlass.h>
    34 #include <cutlass/tensor_ref.h>
    35 
    36 namespace cutlass {
    37 
    39 
    41 template <typename T>
    42 class TensorView : public TensorRef<T, 4> {
    43  public:
    46 
    48  typedef Base TensorRef_t;
    49 
    52 
    54  static int const Rank = TensorRef_t::Rank;
    55 
    57  typedef int Offset_t;
    58 
    61 
    62  private:
    63  //
    64  // Data members
    65  //
    66 
    68  TensorRef_t ref_;
    69 
    71  Coord_t size_;
    72 
    73  public:
    74  //
    75  // Device and Host Methods
    76  //
    77 
    81 
    84  TensorView(TensorRef_t const& _ref, Coord_t const& _size) : Base(_ref), size_(_size) {}
    85 
    88  bool good() const { return ref().good(); }
    89 
    92  T* data() const { return ref().data(); }
    93 
    96  void reset(TensorRef_t const& _ref = TensorRef_t(0), Coord_t const& _size = Coord_t()) {
    97  Base::operator=(_ref);
    98  size_ = _size;
    99  }
    100 
    103  TensorRef_t& ref() { return *this; }
    104 
    108 
    111  TensorRef_t const& ref() const { return *this; }
    112 
    115  Coord_t const& size() const { return size_; }
    116 
    119  int size(int dim) const { return size_.at(dim); }
    120 
    123  Coord_t const& stride() const { return ref().stride(); }
    124 
    127  int const& stride(int dim) const { return ref().stride(dim); }
    128 
    131  TensorView& operator=(TensorView const& _tensor) {
    132  Base::operator=(_tensor._ref);
    133  size_ = _tensor.size_;
    134  return *this;
    135  }
    136 
    139  Offset_t offset(Coord_t const& coord) const { return ref().offset(coord); }
    140 
    143  bool contains(Coord_t const& coord) const {
    144  for (int dim = 0; dim < Rank; ++dim) {
    145  if (coord.at(dim) >= size_.at(dim)) {
    146  return false;
    147  }
    148  }
    149  return true;
    150  }
    151 
    154  T& at(Coord_t const& coord) const { return ref().at(coord); }
    155 
    157  T& operator[](Coord<Rank> const& coord) const { return at(coord); }
    158 
    161  T& at(Offset_t idx) const { return ref().at(idx); }
    162 
    165  TensorView<T> subview(Coord_t const& location, Coord_t size) const {
    166  return TensorView<T>(ref() + location, size.clamp(size_ - location));
    167  }
    168 };
    169 
    171 
    172 } // namespace cutlass
    CUTLASS_HOST_DEVICE TensorRef_t const & ref() const
    Accesses the tensor reference pointing to data.
    Definition: tensor_view.h:111
    +
    Definition: convert.h:33
    +
    Defines a structure containing strides, bounds, and a pointer to tensor data.
    +
    CUTLASS_HOST_DEVICE Storage & at(Coord< Rank > const &coord) const
    Returns a reference to the element at a given Coord.
    Definition: tensor_ref.h:121
    +
    int Offset_t
    Type used to compute the offset of an element to the base of a tensor.
    Definition: tensor_view.h:57
    +
    static int const Rank
    Rank of tensor.
    Definition: tensor_ref.h:48
    +
    CUTLASS_HOST_DEVICE TensorView()
    Default constructor.
    Definition: tensor_view.h:80
    +
    CUTLASS_HOST_DEVICE int size(int dim) const
    Accesses the size.
    Definition: tensor_view.h:119
    +
    CUTLASS_HOST_DEVICE Coord & clamp(Coord< N > const &max, Coord< N > const &min=Coord< N >())
    Clamps a coordinate to a range specified by maximum and minimum values.
    Definition: coord.h:219
    +
    Coord< Rank > Coord_t
    Coordinate into tensor.
    Definition: tensor_view.h:60
    +
    CUTLASS_HOST_DEVICE void reset(TensorRef_t const &_ref=TensorRef_t(0), Coord_t const &_size=Coord_t())
    Updates the reference and size of a Tensor_view object.
    Definition: tensor_view.h:96
    +
    CUTLASS_HOST_DEVICE bool contains(Coord_t const &coord) const
    Determines whether a location is within a tensor.
    Definition: tensor_view.h:143
    +
    CUTLASS_HOST_DEVICE int const & stride(int dim) const
    Accesses the stride.
    Definition: tensor_view.h:127
    +
    static int const Rank
    Rank of tensor.
    Definition: tensor_view.h:54
    +
    CUTLASS_HOST_DEVICE T & at(Offset_t idx) const
    Element-wise accessor.
    Definition: tensor_view.h:161
    +
    CUTLASS_HOST_DEVICE ConstTensorRef_t const_ref()
    Definition: tensor_view.h:107
    +
    CUTLASS_HOST_DEVICE Storage * data() const
    Returns the pointer to referenced data.
    Definition: tensor_ref.h:99
    +
    Host-side reference implementation of tensor operations.
    Definition: tensor_view.h:42
    +
    CUTLASS_HOST_DEVICE long long offset(Coord< Rank > const &coord) const
    Computes the offset of an index from the origin of the tensor.
    Definition: tensor_ref.h:115
    +
    Structure modeling a pointer and stride into a tensor.
    Definition: tensor_ref.h:42
    +
    TensorRef< T, 4 > Base
    Reference and stride.
    Definition: tensor_view.h:45
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    CUTLASS_HOST_DEVICE bool good() const
    Returns true if the Tensor_view is bound to some memory.
    Definition: tensor_view.h:88
    +
    CUTLASS_HOST_DEVICE bool good() const
    Returns true if the TensorRef may be safely accessed.
    Definition: tensor_ref.h:95
    +
    CUTLASS_HOST_DEVICE Offset_t offset(Coord_t const &coord) const
    Returns the index of an element.
    Definition: tensor_view.h:139
    +
    CUTLASS_HOST_DEVICE T * data() const
    Returns a pointer to data.
    Definition: tensor_view.h:92
    + +
    T & operator[](Coord< Rank > const &coord) const
    Element-wise accessor.
    Definition: tensor_view.h:157
    +
    Base TensorRef_t
    Reference and stride.
    Definition: tensor_view.h:48
    +
    CUTLASS_HOST_DEVICE int & at()
    Gets the index of a given Coord element.
    Definition: coord.h:185
    +
    CUTLASS_HOST_DEVICE T & at(Coord_t const &coord) const
    Element-wise accessor.
    Definition: tensor_view.h:154
    +
    CUTLASS_HOST_DEVICE Coord_t const & size() const
    Accesses the size.
    Definition: tensor_view.h:115
    +
    CUTLASS_HOST_DEVICE Coord_t const & stride() const
    Accesses the stride.
    Definition: tensor_view.h:123
    +
    CUTLASS_HOST_DEVICE TensorRef_t & ref()
    Accesses the tensor reference pointing to data.
    Definition: tensor_view.h:103
    +
    CUTLASS_HOST_DEVICE Coord< Rank > const & stride() const
    Returns the stride of the tensor.
    Definition: tensor_ref.h:103
    +
    CUTLASS_HOST_DEVICE TensorView & operator=(TensorView const &_tensor)
    Assigns the Tensor_view.
    Definition: tensor_view.h:131
    +
    Basic include for CUTLASS macros.
    +
    CUTLASS_HOST_DEVICE TensorView(TensorRef_t const &_ref, Coord_t const &_size)
    Constructs a Tensor_view from a TensorRef and size.
    Definition: tensor_view.h:84
    +
    TensorRef< T const, 4 > ConstTensorRef_t
    Reference to constant type.
    Definition: tensor_view.h:51
    +
    CUTLASS_HOST_DEVICE TensorView< T > subview(Coord_t const &location, Coord_t size) const
    Returns a Tensor_view given location and size quantities.
    Definition: tensor_view.h:165
    +
    + + + + diff --git a/docs/generated-html/thread__multiply__add_8h.html b/docs/generated-html/thread__multiply__add_8h.html new file mode 100644 index 00000000..ab673590 --- /dev/null +++ b/docs/generated-html/thread__multiply__add_8h.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: thread_multiply_add.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    thread_multiply_add.h File Reference
    +
    +
    + +

    Template implementing matrix multiply-add operations on fragments. +More...

    +
    #include <cutlass/fragment.h>
    +
    +

    Go to the source code of this file.

    + + + + + +

    +Classes

    struct  cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
     Template performing matrix multiply-add operation within a thread. More...
     
    + + + + + +

    +Namespaces

     cutlass
     
     cutlass::gemm
     
    +
    + + + + diff --git a/docs/generated-html/thread__multiply__add_8h_source.html b/docs/generated-html/thread__multiply__add_8h_source.html new file mode 100644 index 00000000..45c64172 --- /dev/null +++ b/docs/generated-html/thread__multiply__add_8h_source.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: thread_multiply_add.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    thread_multiply_add.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/fragment.h>
    31 
    32 namespace cutlass {
    33 namespace gemm {
    34 
    36 
    38 template <typename AccumulatorsPerThread_,
    39  typename ThreadsPerWarp_,
    40  typename ScalarA_,
    41  typename ScalarB_,
    42  typename ScalarC_>
    47  typedef AccumulatorsPerThread_ AccumulatorsPerThread;
    49  typedef ThreadsPerWarp_ ThreadsPerWarp;
    53  typedef ScalarA_ ScalarA;
    57  typedef ScalarB_ ScalarB;
    61  typedef ScalarC_ ScalarC;
    64 
    66  CUTLASS_DEVICE ThreadMultiplyAdd() {}
    67 
    69  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
    70  FragmentB const& b,
    71  Accumulators const& c,
    72  Accumulators& d) {
    73  for (int j = 0; j < AccumulatorsPerThread::kH; ++j) {
    74  for (int i = 0; i < AccumulatorsPerThread::kW; ++i) {
    75  d[j * AccumulatorsPerThread::kW + i] = a[i] * b[j] + c[j * AccumulatorsPerThread::kW + i];
    76  }
    77  }
    78  }
    79 };
    80 
    82 
    83 } // namespace gemm
    84 } // namespace cutlass
    Definition: convert.h:33
    +
    ThreadsPerWarp_ ThreadsPerWarp
    The number of threads per warp.
    Definition: thread_multiply_add.h:49
    +
    Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
    Definition: shape.h:119
    +
    A template defining Fragment Concept.
    Definition: fragment.h:99
    +
    Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
    The fragment for A.
    Definition: thread_multiply_add.h:55
    +
    CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
    Multiply : d = a*b + c.
    Definition: thread_multiply_add.h:69
    +
    Shape< 1, 1, 1, 1 > InstructionShape
    The shape of the instruction.
    Definition: thread_multiply_add.h:45
    +
    ScalarC_ ScalarC
    The type for C and D.
    Definition: thread_multiply_add.h:61
    +
    CUTLASS_DEVICE ThreadMultiplyAdd()
    Ctor.
    Definition: thread_multiply_add.h:66
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    AccumulatorsPerThread_ AccumulatorsPerThread
    The number of accumulators per thread.
    Definition: thread_multiply_add.h:47
    +
    ScalarB_ ScalarB
    The type for B.
    Definition: thread_multiply_add.h:57
    +
    Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW, 16 > Accumulators
    The accumulators.
    Definition: thread_multiply_add.h:63
    +
    Template performing matrix multiply-add operation within a thread.
    Definition: thread_multiply_add.h:43
    +
    ScalarA_ ScalarA
    The type for A.
    Definition: thread_multiply_add.h:53
    +
    Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
    The fragment for B.
    Definition: thread_multiply_add.h:59
    +
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    +
    ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
    The number of accumulators per warp.
    Definition: thread_multiply_add.h:51
    +
    + + + + diff --git a/docs/generated-html/tile_8h.html b/docs/generated-html/tile_8h.html new file mode 100644 index 00000000..76aeb22d --- /dev/null +++ b/docs/generated-html/tile_8h.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: tile.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    tile.h File Reference
    +
    +
    + +

    Defines a type for restructuring a tile. +More...

    +
    #include <cutlass/shape.h>
    +
    +

    Go to the source code of this file.

    + + + + + + +

    +Classes

    struct  cutlass::ReshapeTile< Tile_, kAccessSize_, bool >
     
    struct  cutlass::ReshapeTile< Tile_, kAccessSize_, true >
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/tile_8h_source.html b/docs/generated-html/tile_8h_source.html new file mode 100644 index 00000000..33597e6a --- /dev/null +++ b/docs/generated-html/tile_8h_source.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: tile.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    tile.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/shape.h>
    31 
    32 namespace cutlass {
    33 
    35 
    36 // The following functor reshapes a tile of data. The goal is to have at least kAccessSize in
    37 // the inner-most dimension. If the user respects that constraint, there is nothing to be done. If
    38 // that's not the case, this functor will correct that and "extract" the right number of elements
    39 // from the next dimension.
    40 
    41 template <typename Tile_, int kAccessSize_, bool = (Tile_::kC < kAccessSize_)>
    42 struct ReshapeTile {
    43  typedef Tile_ Tile;
    44 };
    45 
    46 template <typename Tile_, int kAccessSize_>
    48  // Make sure the W dimension of the tile is large enough.
    49  static_assert(Tile_::kW >= kAccessSize_, "The W dimension is too small");
    50  // Make sure the dimension can be divided by the number of scalars.
    51  static_assert(Tile_::kW % kAccessSize_ == 0, "Not supported");
    52  // Collapse the W dimension.
    53  typedef Shape<Tile_::kD, Tile_::kH, Tile_::kW / kAccessSize_, kAccessSize_> Tile;
    54 };
    55 
    57 
    58 } // namespace cutlass
    Definition: convert.h:34
    + +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:63
    +
    Shape< Tile_::kD, Tile_::kH, Tile_::kW/kAccessSize_, kAccessSize_ > Tile
    Definition: tile.h:49
    +
    #define static_assert(__e, __m)
    Definition: nv_std.h:167
    +
    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
    +
    + + + + diff --git a/docs/generated-html/tile__iterator_8h.html b/docs/generated-html/tile__iterator_8h.html new file mode 100644 index 00000000..2b778c19 --- /dev/null +++ b/docs/generated-html/tile__iterator_8h.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: tile_iterator.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    tile_iterator.h File Reference
    +
    +
    + +

    Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently. +More...

    + +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::IteratorAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    struct  cutlass::IteratorFragment
     Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. More...
     
    struct  cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >
     A template defining Tile Traits Concept. More...
     
    struct  cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
     Iterator for accessing a stripmined tile in memory. More...
     
    struct  cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
     Parameters to the iterator. More...
     
    struct  cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
     An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
     
    struct  cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
     Parameters. More...
     
    struct  cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
     An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
     
    struct  cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
     Parameters. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/tile__iterator_8h_source.html b/docs/generated-html/tile__iterator_8h_source.html new file mode 100644 index 00000000..69ef0b17 --- /dev/null +++ b/docs/generated-html/tile__iterator_8h_source.html @@ -0,0 +1,246 @@ + + + + + + + +Cutlass: tile_iterator.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    tile_iterator.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cutlass/fragment.h>
    32 #include <cutlass/load_store.h>
    34 #include <cutlass/vector.h>
    35 
    36 namespace cutlass {
    37 
    39 
    58 
    62  enum Kind { kD, kH, kW };
    63 };
    64 
    68 };
    69 
    71 
    76 template <typename Tile_, typename Delta_, typename Iterations_, typename ThreadOffset_>
    77 struct TileTraits {
    79  typedef Tile_ Tile;
    80 
    82  typedef Delta_ Delta;
    83 
    85  typedef Iterations_ Iterations;
    86 
    88  typedef ThreadOffset_ ThreadOffset;
    89 };
    90 
    92 
    94 template <typename Traits_,
    95  typename Scalar_,
    98  typename Index_ = int,
    99  typename FragmentElement_ = Scalar_,
    101  typename Skew_ = Shape<0, 0, 0, 0> >
    104  typedef Traits_ Traits;
    105 
    107  typedef Scalar_ Scalar;
    108 
    110  typedef FragmentElement_ FragmentElement;
    111 
    113  static IteratorAdvance::Kind const kAdvance = Advance_;
    114 
    116  static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_;
    117 
    120 
    122  typedef Index_ Index;
    123 
    125  typedef Skew_ Skew;
    126 
    128  typedef typename Traits::Tile Tile;
    129 
    131  typedef typename Traits::Delta Delta;
    132 
    134  typedef typename Traits::ImmediateOffsetStrides ImmediateOffsetStrides;
    135 
    137  typedef typename Traits::Iterations Iterations;
    138 
    140  typedef typename Traits::ThreadOffset ThreadOffset;
    141 
    143  static int const kAccessSize = Tile::kC;
    144 
    147 
    149  static int const kFragmentSize =
    161 
    164 
    165  //
    166  // Params struct
    167  //
    168 
    170  struct Params {
    174 
    178 
    180 
    183  int initialize(Index _stride_d,
    184  Index _stride_h,
    185  Index _stride_w,
    186  Index _inc_d,
    187  Index _inc_h,
    188  Index _inc_w,
    189  Index _inc_advance) {
    190  stride_d = _stride_d;
    191  stride_h = _stride_h;
    192  stride_w = _stride_w;
    193 
    194  inc_d = _inc_d;
    195  inc_h = _inc_h;
    196  inc_w = _inc_w;
    197  inc_advance = _inc_advance;
    198 
    199  return 0;
    200  }
    201 
    203  int initialize(Index _stride_d, Index _stride_h, Index _stride_w) {
    204  stride_d = _stride_d;
    205  stride_h = _stride_h;
    206  stride_w = _stride_w;
    207 
    208  inc_w = stride_w * Delta::kW;
    209  inc_h = stride_h * Delta::kH - stride_w * Delta::kW * (Iterations::kW - 1);
    210 
    211  if (kAdvance == IteratorAdvance::kH) {
    212  // Advance in the H dimension.
    213  inc_d = 0;
    214  } else if (kAdvance == IteratorAdvance::kW) {
    215  // Advance in the W dimension.
    216  inc_d = stride_w * Tile::kW - stride_h * Tile::kH;
    217  } else {
    218  // Advance in the D dimension.
    219  inc_d = stride_d;
    220  }
    221 
    222  inc_advance = 0;
    223 
    224  return 0;
    225  }
    226 
    228  stride_d = 0;
    229  stride_h = 0;
    230  stride_w = 1;
    231 
    232  inc_d = inc_h = inc_w = inc_advance = 0;
    233 
    234  return 0;
    235  }
    236  };
    237 
    239  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; }
    240 
    241  //
    242  // Static function members
    243  //
    244 
    246  template <typename PredicateIterator>
    247  CUTLASS_DEVICE static void initialize_predicates(PredicateIterator predicate_it,
    248  Coord<3> const &bounds,
    249  Coord<3> const &offset = make_Coord(0, 0, 0)) {
    250  for (int d = 0; d < Iterations::kD; ++d) {
    251  bool enable_d = (d * Delta::kD + offset[0] < bounds[0]);
    252  for (int h = 0; h < Iterations::kH; ++h) {
    253  bool enable_h = (h * Delta::kH + offset[1] < bounds[1]);
    254  for (int w = 0; w < Iterations::kW; ++w) {
    255  bool enable_w = (w * Tile::kC * Delta::kW + offset[2] < bounds[2]);
    256  predicate_it.set(d, h, w, 0, enable_d && enable_h && enable_w);
    257  }
    258  }
    259  }
    260  }
    261 };
    262 
    264 
    288 
    294 template <typename Traits_,
    295  typename Scalar_,
    298  typename Index_ = int,
    299  typename FragmentElement_ = Scalar_,
    301  typename Skew_ = Shape<0, 0, 0, 0> >
    302 struct TileLoadIterator : public TileIteratorBase<Traits_,
    303  Scalar_,
    304  Advance_,
    305  MemorySpace,
    306  Index_,
    307  FragmentElement_,
    308  IteratorFragment_,
    309  Skew_> {
    311  typedef TileIteratorBase<Traits_,
    312  Scalar_,
    313  Advance_,
    314  MemorySpace,
    315  Index_,
    316  FragmentElement_,
    317  IteratorFragment_,
    318  Skew_>
    320 
    322  typedef typename Base::Traits Traits;
    323 
    325  typedef typename Base::Scalar Scalar;
    326 
    329 
    332 
    335 
    338 
    340  typedef typename Base::Index Index;
    341 
    343  typedef typename Base::Skew Skew;
    344 
    346  typedef typename Base::Tile Tile;
    347 
    349  typedef typename Base::Delta Delta;
    350 
    352  typedef typename Base::Iterations Iterations;
    353 
    356 
    359 
    361  typedef typename Base::AccessType AccessType;
    362 
    364  typedef typename Base::Fragment Fragment;
    365 
    368 
    371 
    374 
    376  typedef typename Base::Storage SharedStorage;
    377 
    379  typedef typename Base::Params BaseParams;
    380 
    382  enum { kRequiresLoadFence = Tile::kD == 1 };
    383 
    385  typedef Scalar const *Pointer;
    386 
    388  struct Params : public BaseParams {
    390  Scalar const *pointer;
    391 
    394  int initialize(SharedStorage const &storage) {
    395  pointer = &storage[0];
    396  return 0;
    397  }
    398 
    403  pointer = ptr;
    404  return 0;
    405  }
    406 
    409  int initialize(Scalar const *ptr,
    410  Index _stride_d,
    411  Index _stride_h,
    412  Index _stride_w,
    413  Index _inc_d,
    414  Index _inc_h,
    415  Index _inc_w,
    416  Index _inc_advance) {
    417  pointer = ptr;
    419  _stride_d, _stride_h, _stride_w, _inc_d, _inc_h, _inc_w, _inc_advance);
    420  return 0;
    421  }
    422 
    423  // Initializes params to default values
    426  };
    427 
    428  //
    429  // Data members
    430  //
    431 
    433  Params params;
    434 
    437 
    439  int stage;
    440 
    441  //
    442  // Static member functions
    443  //
    444 
    446  template <typename PredicateIterator>
    447  CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it,
    448  Coord<3> const &bounds,
    449  Coord<3> const &block_offset = make_Coord(0,
    450  0,
    451  0)) {
    453  predicate_it,
    454  bounds,
    455  block_offset + make_Coord(0, thread_offset[1], thread_offset[2] * Tile::kC));
    456  }
    457 
    458  //
    459  // Methods
    460  //
    461 
    465 
    468  TileLoadIterator(Params const &_params,
    469  Coord<3> const &block_offset = make_Coord(0, 0, 0),
    470  ThreadOffset thread_offset_func = ThreadOffset())
    471  : params(_params), stage(0) {
    472  thread_offset = thread_offset_func();
    473 
    474  Index block_offset_h = 0;
    475  Index block_offset_w = 0;
    476  if (kAdvance == IteratorAdvance::kH) {
    477  block_offset_h = block_offset[1];
    478  block_offset_w = block_offset[2];
    479  } else {
    480  block_offset_h = block_offset[2];
    481  block_offset_w = block_offset[1];
    482  }
    483 
    484  params.pointer += block_offset[0] * params.stride_d +
    485  (block_offset_h + thread_offset[1]) * params.stride_h +
    486  (block_offset_w + thread_offset[2] * Tile::kC) / Tile::kC * params.stride_w;
    487  }
    488 
    491  TileLoadIterator(Params const &,
    492  SharedStorage &shared_storage,
    493  Coord<3> const &block_offset = make_Coord(0, 0, 0),
    494  ThreadOffset thread_offset_func = ThreadOffset())
    495  : stage(0) {
    496  int const offset = thread_offset_func()[2];
    497  params.pointer = &shared_storage[offset];
    498  }
    499 
    502  Scalar const *data() const { return params.pointer; }
    503 
    506 
    509 
    512 
    515 
    517  CUTLASS_DEVICE void inc_stage() {
    518  if (Tile::kD > 1) {
    519  int const kStageSize = Tile::kH * Tile::kW * Tile::kC;
    520  if (stage == Tile::kD - 1) {
    521  params.pointer -= (Tile::kD - 1) * kStageSize;
    522  stage = 0;
    523  } else {
    524  params.pointer += kStageSize;
    525  stage = stage + 1;
    526  }
    527  }
    528  }
    529 
    530  public:
    532  template <typename Fragment, typename PredicateIterator>
    533  CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it) {
    534  FragmentIterator frag_iterator(fragment);
    535 
    536  for (int d = 0; d < Iterations::kD; ++d) {
    537  for (int h = 0; h < Iterations::kH; ++h) {
    538  for (int w = 0; w < Iterations::kW; ++w, ++pred_it) {
    539  if (*pred_it) {
    541  reinterpret_cast<AccessType &>(frag_iterator.at(d, h, w, 0)), data(), 0);
    542  }
    543 
    544  if (w < Iterations::kW - 1) {
    545  inc_w();
    546  }
    547  }
    548  if (h < Iterations::kH - 1) {
    549  inc_h();
    550  }
    551  }
    552  if (d < Iterations::kD - 1) {
    553  inc_d();
    554  }
    555  }
    556  inc_advance();
    557  }
    558 
    560  template <typename Fragment>
    562  typename PredicateVector::TrivialIterator pred_it;
    563  load_post_increment(fragment, pred_it);
    564  }
    565 
    567  template <typename Fragment, typename PredicateIterator>
    568  CUTLASS_HOST_DEVICE void load(Fragment &fragment, PredicateIterator pred_it) const {
    569  TileLoadIterator _load_it(*this);
    570  _load_it.load_post_increment(fragment, pred_it);
    571  }
    572 
    574  template <typename Fragment>
    575  CUTLASS_HOST_DEVICE void load(Fragment &fragment) const {
    576  typename PredicateVector::TrivialIterator pred_it;
    577  load(fragment, pred_it);
    578  }
    579 };
    580 
    582 
    606 
    612 template <typename Traits_,
    613  typename Scalar_,
    616  typename Index_ = int,
    617  typename FragmentElement_ = Scalar_,
    619  typename Skew_ = Shape<0, 0, 0, 0> >
    620 struct TileStoreIterator : public TileIteratorBase<Traits_,
    621  Scalar_,
    622  Advance_,
    623  MemorySpace,
    624  Index_,
    625  FragmentElement_,
    626  IteratorFragment_,
    627  Skew_> {
    629  typedef TileIteratorBase<Traits_,
    630  Scalar_,
    631  Advance_,
    632  MemorySpace,
    633  Index_,
    634  FragmentElement_,
    635  IteratorFragment_,
    636  Skew_>
    638 
    640  typedef typename Base::Traits Traits;
    641 
    643  typedef typename Base::Scalar Scalar;
    644 
    647 
    650 
    653 
    656 
    658  typedef typename Base::Index Index;
    659 
    661  typedef typename Base::Skew Skew;
    662 
    664  typedef typename Base::Tile Tile;
    665 
    667  typedef typename Base::Delta Delta;
    668 
    670  typedef typename Base::Iterations Iterations;
    671 
    674 
    677 
    679  typedef typename Base::AccessType AccessType;
    680 
    682  typedef typename Base::Fragment Fragment;
    683 
    686 
    689 
    692 
    694  typedef typename Base::Storage SharedStorage;
    695 
    697  typedef typename Base::Params BaseParams;
    698 
    700  struct Params : public BaseParams {
    703 
    706  int initialize(SharedStorage &storage) {
    707  pointer = &storage[0];
    708  return 0;
    709  }
    710 
    715  pointer = ptr;
    716  return 0;
    717  }
    718 
    721  int initialize(Scalar *ptr,
    722  Index _stride_d,
    723  Index _stride_h,
    724  Index _stride_w,
    725  Index _inc_d,
    726  Index _inc_h,
    727  Index _inc_w,
    728  Index _inc_advance) {
    729  pointer = ptr;
    731  _stride_d, _stride_h, _stride_w, _inc_d, _inc_h, _inc_w, _inc_advance);
    732  return 0;
    733  }
    734 
    738  };
    739 
    740  //
    741  // Data members
    742  //
    743 
    746 
    749 
    751  int stage;
    752 
    753  //
    754  // Static member functions
    755  //
    756 
    758  template <typename PredicateIterator>
    759  CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it,
    760  Coord<3> const &bounds,
    761  Coord<3> const &block_offset = make_Coord(0,
    762  0,
    763  0)) {
    765  predicate_it,
    766  bounds,
    767  block_offset + make_Coord(0, thread_offset[1], thread_offset[2] * Tile::kC));
    768  }
    769 
    770  //
    771  // Methods
    772  //
    773 
    777 
    780  TileStoreIterator(Params const &_params,
    781  Coord<3> const &block_offset = make_Coord(0, 0, 0),
    782  ThreadOffset thread_offset_func = ThreadOffset())
    783  : params(_params), stage(0) {
    784  thread_offset = thread_offset_func();
    785 
    786  params.pointer += block_offset[0] * params.stride_d +
    787  (block_offset[1] + thread_offset[1]) * params.stride_h +
    788  (block_offset[2] + thread_offset[2] * Tile::kC) / Tile::kC * params.stride_w;
    789  }
    790 
    794  SharedStorage &shared_storage,
    795  Coord<3> const &block_offset = make_Coord(0, 0, 0),
    796  ThreadOffset thread_offset_func = ThreadOffset())
    797  : stage(0) {
    798  int const offset = thread_offset_func()[2];
    799  params.pointer = &shared_storage[offset];
    800  }
    801 
    804  Scalar *data() const { return params.pointer; }
    805 
    808 
    811 
    814 
    817 
    819  CUTLASS_DEVICE void inc_stage() {
    820  if (Tile::kD > 1) {
    821  int const kStageSize = Tile::kH * Tile::kW * Tile::kC;
    822  if (stage == Tile::kD - 1) {
    823  params.pointer -= (Tile::kD - 1) * kStageSize;
    824  stage = 0;
    825  } else {
    826  params.pointer += kStageSize;
    827  stage = stage + 1;
    828  }
    829  }
    830  }
    831 
    832  public:
    834  template <typename Fragment, typename PredicateIterator>
    835  CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment, PredicateIterator pred_it) {
    836  FragmentIterator frag_iterator(fragment);
    837 
    838  for (int d = 0; d < Iterations::kD; ++d) {
    839  for (int h = 0; h < Iterations::kH; ++h) {
    840  for (int w = 0; w < Iterations::kW; ++w, ++pred_it) {
    841  if (*pred_it) {
    843  reinterpret_cast<AccessType &>(frag_iterator.at(d, h, w, 0)), data(), 0);
    844  }
    845  if (w < Iterations::kW - 1) {
    846  inc_w();
    847  }
    848  }
    849  if (h < Iterations::kH - 1) {
    850  inc_h();
    851  }
    852  }
    853  if (d < Iterations::kD - 1) {
    854  inc_d();
    855  }
    856  }
    857  inc_advance();
    858  }
    859 
    861  template <typename Fragment>
    863  typename PredicateVector::TrivialIterator pred_it;
    864  store_post_increment(fragment, pred_it);
    865  }
    866 
    868  template <typename Fragment, typename PredicateIterator>
    869  CUTLASS_HOST_DEVICE void store(Fragment &fragment, PredicateIterator pred_it) const {
    870  TileStoreIterator _store_it(*this);
    871  _store_it.store_post_increment(fragment, pred_it);
    872  }
    873 
    875  template <typename Fragment>
    876  CUTLASS_HOST_DEVICE void store(Fragment &fragment) const {
    877  typename PredicateVector::TrivialIterator pred_it;
    878  store(fragment, pred_it);
    879  }
    880 };
    881 }
    static int const kFragmentSize
    The size of storage needed per fragment.
    Definition: tile_iterator.h:149
    +
    static IteratorFragment::Kind const kIteratorFragment
    Specifies type of iterator fragment storage (Salar or WmmaMatrix)
    Definition: tile_iterator.h:334
    +
    CUTLASS_HOST_DEVICE void inc_advance()
    Increment in the next dimension.
    Definition: tile_iterator.h:816
    +
    FragmentConstIterator< Fragment, Iterations, AccessType > FragmentConstIterator
    The fragment const iterator.
    Definition: tile_iterator.h:158
    +
    TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Base
    Base class.
    Definition: tile_iterator.h:637
    +
    Base::Fragment Fragment
    Fragment definition.
    Definition: tile_iterator.h:682
    +
    Base::FragmentIterator FragmentIterator
    Fragment iterator definition.
    Definition: tile_iterator.h:367
    +
    Definition: convert.h:33
    +
    Base::Tile Tile
    Tile shape.
    Definition: tile_iterator.h:346
    +
    CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it)
    Loads a fragment and advances the iterator to the next tile.
    Definition: tile_iterator.h:533
    +
    Base::ThreadOffset ThreadOffset
    ThreadOffset functor.
    Definition: tile_iterator.h:355
    +
    static IteratorAdvance::Kind const kAdvance
    Specifies in which dimension post-increment accesses advance.
    Definition: tile_iterator.h:649
    +
    FragmentIterator::FragmentShape FragmentShape
    The shape of the fragment.
    Definition: tile_iterator.h:160
    +
    Traits::ThreadOffset ThreadOffset
    Thread offset.
    Definition: tile_iterator.h:140
    +
    static IteratorFragment::Kind const kIteratorFragment
    Specifies type of iterator fragment storage (Salar or WmmaMatrix)
    Definition: tile_iterator.h:652
    +
    Skew_ Skew
    Skew quantity.
    Definition: tile_iterator.h:125
    +
    Base::FragmentShape FragmentShape
    Fragment type.
    Definition: tile_iterator.h:676
    +
    CUTLASS_HOST_DEVICE int initialize(SharedStorage &storage)
    Initialize params to access storage object.
    Definition: tile_iterator.h:706
    +
    Enum to specify which memory space data resides in.
    Definition: load_store.h:39
    +
    Base::Skew Skew
    Skew quantity.
    Definition: tile_iterator.h:343
    +
    Kind
    Definition: tile_iterator.h:62
    +
    CUTLASS_HOST_DEVICE int initialize()
    Definition: tile_iterator.h:227
    +
    Base::Skew Skew
    Skew quantity.
    Definition: tile_iterator.h:661
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:241
    +
    Base::Storage SharedStorage
    Storage object which may be stored to.
    Definition: tile_iterator.h:694
    +
    A template defining Tile Traits Concept.
    Definition: tile_iterator.h:77
    +
    CUTLASS_HOST_DEVICE Scalar const * data() const
    Returns the current pointer.
    Definition: tile_iterator.h:502
    +
    TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Base
    Base class.
    Definition: tile_iterator.h:319
    +
    CUTLASS_HOST_DEVICE TileLoadIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
    Constructs a tile load iterator.
    Definition: tile_iterator.h:491
    +
    CUTLASS_HOST_DEVICE int initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)
    Initializes params to access a raw pointer.
    Definition: tile_iterator.h:401
    +
    Base::Params BaseParams
    IteratorBase parameters.
    Definition: tile_iterator.h:379
    +
    Params params
    Parameters structure.
    Definition: tile_iterator.h:745
    +
    static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The load function.
    Definition: load_store.h:59
    +
    CUTLASS_HOST_DEVICE int initialize(SharedStorage const &storage)
    Initialize params to access storage object.
    Definition: tile_iterator.h:394
    +
    Definition: tile_iterator.h:382
    +
    Base::Scalar Scalar
    Scalar element.
    Definition: tile_iterator.h:325
    +
    Base::AccessType AccessType
    Memory access type.
    Definition: tile_iterator.h:361
    +
    Definition: tile_iterator.h:62
    +
    CUTLASS_HOST_DEVICE void store(Fragment &fragment, PredicateIterator pred_it) const
    Stores a fragment without advancing the iterator.
    Definition: tile_iterator.h:869
    +
    static IteratorAdvance::Kind const kAdvance
    Specifies in which dimension post-increment accesses advance.
    Definition: tile_iterator.h:331
    +
    CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment)
    Loads a fragment and advances the iterator to the next tile.
    Definition: tile_iterator.h:561
    +
    CUTLASS_HOST_DEVICE TileLoadIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
    Constructs a tile load iterator.
    Definition: tile_iterator.h:468
    +
    CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
    Is the iterator valid?
    Definition: tile_iterator.h:239
    +
    Iterations_ Iterations
    Number of accesses performed.
    Definition: tile_iterator.h:85
    +
    CUTLASS_HOST_DEVICE int initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w)
    Initializes params to access a raw pointer.
    Definition: tile_iterator.h:713
    +
    Params params
    Parameters structure.
    Definition: tile_iterator.h:433
    +
    Iterator that always returns true.
    Definition: predicate_vector.h:308
    +
    Base::Scalar Scalar
    Scalar element.
    Definition: tile_iterator.h:643
    +
    Base::Traits Traits
    concept TileTraits
    Definition: tile_iterator.h:640
    +
    Kind
    Definition: load_store.h:40
    +
    Index stride_h
    Definition: tile_iterator.h:172
    +
    CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment)
    Stores a fragment and advances to the next tile.
    Definition: tile_iterator.h:862
    +
    Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSize > Storage
    The storage.
    Definition: tile_iterator.h:152
    +
    CUTLASS_HOST_DEVICE int initialize()
    Definition: tile_iterator.h:425
    +
    CUTLASS_HOST_DEVICE void inc_d()
    Increment in the D dimension.
    Definition: tile_iterator.h:807
    +
    Base::Iterations Iterations
    Iterations.
    Definition: tile_iterator.h:352
    +
    CUTLASS_HOST_DEVICE int initialize()
    Initializes params to default values.
    Definition: tile_iterator.h:737
    +
    Base::FragmentConstIterator FragmentConstIterator
    Fragment const iterator definition.
    Definition: tile_iterator.h:370
    +
    Index_ Index
    Index type.
    Definition: tile_iterator.h:122
    +
    static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:136
    +
    Index inc_h
    Definition: tile_iterator.h:176
    +
    Defines container classes and iterators for managing a statically sized vector of boolean predicates...
    +
    Base::Storage SharedStorage
    Storage object that may be loaded from.
    Definition: tile_iterator.h:376
    +
    Parameters.
    Definition: tile_iterator.h:700
    +
    CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
    Initializes a predicate vector.
    Definition: tile_iterator.h:759
    +
    An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
    Definition: tile_iterator.h:302
    +
    Base::Traits Traits
    concept TileTraits
    Definition: tile_iterator.h:322
    +
    static CUTLASS_DEVICE void initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
    Initializes a predicate vector.
    Definition: tile_iterator.h:247
    +
    Base::Params BaseParams
    IteratorBase parameters.
    Definition: tile_iterator.h:697
    +
    Base::FragmentElement FragmentElement
    Fragment element.
    Definition: tile_iterator.h:328
    +
    Traits::Tile Tile
    Tile shape.
    Definition: tile_iterator.h:128
    +
    FragmentIterator< Fragment, Iterations, AccessType > FragmentIterator
    The fragment iterator.
    Definition: tile_iterator.h:156
    +
    int stage
    The stage.
    Definition: tile_iterator.h:751
    +
    CUTLASS_HOST_DEVICE int initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
    Initializes params.
    Definition: tile_iterator.h:183
    +
    Base::AccessType AccessType
    Memory access type.
    Definition: tile_iterator.h:679
    +
    Base::FragmentElement FragmentElement
    Fragment element.
    Definition: tile_iterator.h:646
    +
    Definition: load_store.h:41
    +
    CUTLASS_DEVICE void inc_stage()
    Increment the stage.
    Definition: tile_iterator.h:517
    +
    Kind
    Definition: tile_iterator.h:67
    +
    Base::PredicateVector PredicateVector
    Default predicate mask type.
    Definition: tile_iterator.h:373
    +
    CUTLASS_HOST_DEVICE int initialize(Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
    Initializes params.
    Definition: tile_iterator.h:409
    +
    CUTLASS_HOST_DEVICE int initialize(Scalar *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
    Initializes params.
    Definition: tile_iterator.h:721
    +
    FragmentElement_ FragmentElement
    Fragment element.
    Definition: tile_iterator.h:110
    +
    Base::Index Index
    Index type.
    Definition: tile_iterator.h:658
    +
    Scalar * pointer
    Pointer to memory.
    Definition: tile_iterator.h:702
    +
    Index inc_advance
    Definition: tile_iterator.h:179
    +
    Definition: tile_iterator.h:67
    +
    ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
    The shape of the the fragment.
    Definition: fragment.h:185
    +
    Index stride_w
    Definition: tile_iterator.h:173
    +
    CUTLASS_HOST_DEVICE TileLoadIterator()
    Default constructor.
    Definition: tile_iterator.h:464
    +
    Defines abstractions for efficiently loading and storing vectors to memory.
    +
    Scalar const * pointer
    Pointer to memory.
    Definition: tile_iterator.h:390
    +
    CUTLASS_HOST_DEVICE TileStoreIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
    Constructs a tile store iterator.
    Definition: tile_iterator.h:780
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    Coord< 4 > thread_offset
    Offset of an individual lane from the start of the tile.
    Definition: tile_iterator.h:748
    +
    Traits::Iterations Iterations
    Iterations.
    Definition: tile_iterator.h:137
    +
    static int const kAccessSize
    The number of scalars accessed per load/store.
    Definition: tile_iterator.h:143
    +
    Tile_ Tile
    Shape of the tile.
    Definition: tile_iterator.h:79
    +
    Delta_ Delta
    Number of steps between accesses along each dimension.
    Definition: tile_iterator.h:82
    +
    CUTLASS_HOST_DEVICE int initialize(Index _stride_d, Index _stride_h, Index _stride_w)
    Definition: tile_iterator.h:203
    +
    Index stride_d
    Definition: tile_iterator.h:171
    +
    CUTLASS_HOST_DEVICE void inc_advance()
    Increment in the next dimension.
    Definition: tile_iterator.h:514
    +
    Definition: vector.h:61
    +
    Base::Delta Delta
    Delta.
    Definition: tile_iterator.h:349
    +
    Base::Tile Tile
    Tile shape.
    Definition: tile_iterator.h:664
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    Base::FragmentShape FragmentShape
    Fragment type.
    Definition: tile_iterator.h:358
    +
    Specifies dimension in which post-increment accesses advance.
    Definition: tile_iterator.h:61
    +
    static MemorySpace::Kind const kMemorySpace
    Source or destination memory space.
    Definition: tile_iterator.h:655
    +
    CUTLASS_HOST_DEVICE void inc_w()
    Increment in the W dimension.
    Definition: tile_iterator.h:511
    +
    Statically-sized array specifying Coords within a tensor.
    Definition: coord.h:48
    +
    Traits::ImmediateOffsetStrides ImmediateOffsetStrides
    The strides in each dimension between different loads/stores.
    Definition: tile_iterator.h:134
    +
    Base::Fragment Fragment
    Fragment definition.
    Definition: tile_iterator.h:364
    +
    Base::Iterations Iterations
    Iterations.
    Definition: tile_iterator.h:670
    +
    Defines a 1D vector of elements held in the registers of each thread.
    +
    Iterator for accessing a stripmined tile in memory.
    Definition: tile_iterator.h:102
    +
    static IteratorFragment::Kind const kIteratorFragment
    Specifies iterator storage fragment type (Scalar or WmmaMatrix)
    Definition: tile_iterator.h:116
    +
    Base::Delta Delta
    Delta.
    Definition: tile_iterator.h:667
    +
    Definition: tile_iterator.h:62
    +
    CUTLASS_HOST_DEVICE Scalar * data() const
    Returns the current pointer.
    Definition: tile_iterator.h:804
    +
    ThreadOffset_ ThreadOffset
    Functor that returns the logical coordinate of each entity&#39;s initial offset in the tile...
    Definition: tile_iterator.h:88
    +
    Vectorize< FragmentElement, kAccessSize >::Type AccessType
    The elements loaded/store by one instruction.
    Definition: tile_iterator.h:146
    +
    CUTLASS_HOST_DEVICE void inc_d()
    Increment in the D dimension.
    Definition: tile_iterator.h:505
    +
    CUTLASS_HOST_DEVICE void inc_h()
    Increment in the H dimension.
    Definition: tile_iterator.h:810
    +
    CUTLASS_HOST_DEVICE void store(Fragment &fragment) const
    Stores a fragment without advancing the iterator.
    Definition: tile_iterator.h:876
    +
    CUTLASS_HOST_DEVICE void inc_h()
    Increment in the H dimension.
    Definition: tile_iterator.h:508
    +
    Parameters.
    Definition: tile_iterator.h:388
    +
    static MemorySpace::Kind const kMemorySpace
    Source or destination memory space.
    Definition: tile_iterator.h:337
    +
    Base::ThreadOffset ThreadOffset
    ThreadOffset functor.
    Definition: tile_iterator.h:673
    +
    static MemorySpace::Kind const kMemorySpace
    Source or destination memory space.
    Definition: tile_iterator.h:119
    +
    Base::FragmentIterator FragmentIterator
    Fragment iterator definition.
    Definition: tile_iterator.h:685
    +
    CUTLASS_HOST_DEVICE void load(Fragment &fragment, PredicateIterator pred_it) const
    Loads a fragment without advancing the iterator..
    Definition: tile_iterator.h:568
    +
    CUTLASS_HOST_DEVICE TileStoreIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
    Constructs a tile store iterator.
    Definition: tile_iterator.h:793
    +
    CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
    Initializes a predicate vector.
    Definition: tile_iterator.h:447
    +
    Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSize > Fragment
    The fragment.
    Definition: tile_iterator.h:154
    +
    CUTLASS_HOST_DEVICE void load(Fragment &fragment) const
    Loads a fragment without advancing the iterator..
    Definition: tile_iterator.h:575
    +
    Definition: tile_iterator.h:62
    +
    static IteratorAdvance::Kind const kAdvance
    Specifies dimension in which post-increment accesses advance.
    Definition: tile_iterator.h:113
    +
    Index inc_w
    Definition: tile_iterator.h:177
    +
    Coord< 4 > thread_offset
    Offset of an individual lane from the start of the tile.
    Definition: tile_iterator.h:436
    +
    Traits::Delta Delta
    Distance along each dimension.
    Definition: tile_iterator.h:131
    +
    int stage
    Stage argument enables wrapping after some number of tiles have been loaded.
    Definition: tile_iterator.h:439
    +
    Base::FragmentConstIterator FragmentConstIterator
    Fragment const iterator definition.
    Definition: tile_iterator.h:688
    +
    CUTLASS_HOST_DEVICE TileStoreIterator()
    Default constructor.
    Definition: tile_iterator.h:776
    +
    Base::PredicateVector PredicateVector
    Default predicate mask type.
    Definition: tile_iterator.h:691
    +
    Scalar const * Pointer
    The pointer type.
    Definition: tile_iterator.h:385
    +
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    +
    Parameters to the iterator.
    Definition: tile_iterator.h:170
    +
    Base::Index Index
    Index type.
    Definition: tile_iterator.h:340
    +
    CUTLASS_DEVICE void inc_stage()
    Increment the stage.
    Definition: tile_iterator.h:819
    +
    CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment, PredicateIterator pred_it)
    Stores a fragment and advances to the next tile.
    Definition: tile_iterator.h:835
    +
    CUTLASS_HOST_DEVICE void inc_w()
    Increment in the W dimension.
    Definition: tile_iterator.h:813
    +
    PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
    Default predicate mask type.
    Definition: tile_iterator.h:163
    +
    Definition: tile_iterator.h:67
    +
    Scalar_ Scalar
    Scalar element.
    Definition: tile_iterator.h:107
    +
    Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix.
    Definition: tile_iterator.h:66
    +
    Index inc_d
    Definition: tile_iterator.h:175
    +
    An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
    Definition: tile_iterator.h:620
    +
    Traits_ Traits
    concept TileTraits
    Definition: tile_iterator.h:104
    +
    + + + + diff --git a/docs/generated-html/tile__traits__standard_8h.html b/docs/generated-html/tile__traits__standard_8h.html new file mode 100644 index 00000000..d45ace8c --- /dev/null +++ b/docs/generated-html/tile__traits__standard_8h.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: tile_traits_standard.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    tile_traits_standard.h File Reference
    +
    +
    + +

    Defines tile traits for several tile partitioning arrangements of threads expected to achieve efficient streaming performance. +More...

    + +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::TiledThreadOffset< ThreadShape >
     Basic thread offset function computed from a thread shape. More...
     
    struct  cutlass::TileTraitsStrideMajor< Tile_, Threads >
     
    struct  cutlass::TileTraitsContiguousMajor< Tile_, Threads >
     
    struct  cutlass::TileTraitsWarpRake< Tile_, Threads >
     Tiling in which warps rake across the contiguous dimension. More...
     
    struct  cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    struct  cutlass::TileTraitsStandard< Tile_, Threads >
     Chooses 'best' shape to enable warp raking along contiguous dimension if possible. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/tile__traits__standard_8h_source.html b/docs/generated-html/tile__traits__standard_8h_source.html new file mode 100644 index 00000000..ed4a1efa --- /dev/null +++ b/docs/generated-html/tile__traits__standard_8h_source.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: tile_traits_standard.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    tile_traits_standard.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cutlass/tile_iterator.h>
    32 
    33 namespace cutlass {
    34 
    36 
    38 template <typename ThreadShape>
    42  Coord<4> operator()() const {
    43  Coord<4> thread_offset;
    44 
    45  int index = threadIdx.x;
    46 
    47  thread_offset[3] = (index % ThreadShape::kC);
    48  index = (index / ThreadShape::kC);
    49 
    50  thread_offset[2] = (index % ThreadShape::kW);
    51  index = (index / ThreadShape::kW);
    52 
    53  thread_offset[1] = (index % ThreadShape::kH);
    54  index = (index / ThreadShape::kH);
    55 
    56  thread_offset[0] = index;
    57 
    58  return thread_offset;
    59  }
    60 };
    61 
    63 
    66 template <typename Tile_, int Threads>
    69  typedef Tile_ Tile;
    70 
    72  static int const kThreads = Threads;
    73 
    74  // Static assertions
    76  "Tiling undefined if elements not divisible by threads.");
    77 
    78  static_assert(Tile::kW <= kThreads,
    79  "This specialization assumes there are more threads than the contiguous dimension "
    80  "of the tile.");
    81 
    83  typedef Shape<1, kThreads / Tile::kW, Tile::kW, 1> ThreadShape;
    84 
    87 
    89  typedef Shape<1, Tile::kH / ThreadShape::kH, 1, 1> Iterations;
    90 
    93 };
    94 
    96 
    99 template <typename Tile_, int Threads>
    102  typedef Tile_ Tile;
    103 
    105  static int const kThreads = Threads;
    106 
    107  // Static assertions
    108  static_assert(Tile::kW >= kThreads,
    109  "This specialization assumes there are more threads than the contiguous dimension "
    110  "of the tile.");
    111 
    113  "Tiling undefined if elements not divisible by threads.");
    114 
    115  static_assert(!(Tile::kW % kThreads),
    116  "The contiguous size of the tile must be divisible by the number of threads.");
    117 
    120 
    123 
    125  typedef Shape<1, Tile::kH, Tile::kW / kThreads> Iterations;
    126 
    129 };
    130 
    132 
    134 template <typename Tile_, int Threads>
    137  typedef Tile_ Tile;
    138 
    140  static int const kThreads = Threads;
    141 
    143  static int const kWarpSize = 32;
    144 
    146  static int const kWarpCount = kThreads / kWarpSize;
    147 
    148  // Static assertions
    150  "Tiling undefined if elements not divisible by threads.");
    151 
    152  static_assert(!(kThreads % kWarpSize), "Number of threads must be divisible by the warp size.");
    153 
    154  static_assert(!(Tile::kW % kWarpSize), "Contiguous dimension must be divisible by the warp size");
    155 
    157  static int const kWarpsStrided = __NV_STD_MIN(kWarpCount, Tile::kH);
    158 
    161 
    164 
    167 
    169  typedef Shape<1, Tile::kH / Delta::kH, Tile::kW / ThreadShape::kW> Iterations;
    170 
    172  struct ThreadOffset {
    176  int tid = threadIdx.x;
    177  int warp = (tid / kWarpSize);
    178  int lane = (tid % kWarpSize);
    179 
    180  static int const kWarpSpanContiguous = kWarpSize * Iterations::kW;
    181 
    182  int warp_w = (warp % kWarpsContiguous);
    183  int warp_h = (warp / kWarpsContiguous);
    184 
    185  return make_Coord(0, warp_h, lane + kWarpSpanContiguous * warp_w, 0);
    186  }
    187  };
    188 };
    189 
    191 
    193 template <typename Tile_, int Threads>
    196  typedef Tile_ Tile;
    197 
    199  static int const kThreads = Threads;
    200 
    202  static int const kWarpSize = 32;
    203 
    205  static int const kWarpCount = kThreads / kWarpSize;
    206 
    207  // Static assertions
    209  "Tiling undefined if elements not divisible by threads.");
    210 
    214  typedef typename platform::conditional <
    215  Tile::kW<kWarpSize,
    217  typename platform::conditional<!(Tile::kW % kWarpSize),
    220  type Traits;
    221 
    223  typedef typename Traits::Delta Delta;
    224 
    227  typedef Delta ImmediateOffsetStrides;
    228 
    230  typedef typename Traits::Iterations Iterations;
    231 
    233  typedef typename Traits::ThreadOffset ThreadOffset;
    234 };
    235 
    237 
    238 } // namespace cutlass
    Shape< 1, Tile::kH/Delta::kH, Tile::kW/ThreadShape::kW > Iterations
    Number of iterations.
    Definition: tile_traits_standard.h:169
    +
    Definition: convert.h:33
    +
    Shape< 1, Tile::kH/ThreadShape::kH, 1, 1 > Iterations
    Number of iterations.
    Definition: tile_traits_standard.h:89
    +
    static int const kWarpCount
    Number of participating warps.
    Definition: tile_traits_standard.h:205
    +
    Definition: tile_traits_standard.h:100
    +
    Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
    +
    static int const kWarpsStrided
    Warps strip-mined across strided dimension.
    Definition: tile_traits_standard.h:157
    +
    static int const kThreads
    Number of participating threads.
    Definition: tile_traits_standard.h:105
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:241
    +
    Computes the thread offset in (H, W) based on thread ID.
    Definition: tile_traits_standard.h:172
    +
    static int const kThreads
    Number of participating threads.
    Definition: tile_traits_standard.h:72
    +
    Chooses &#39;best&#39; shape to enable warp raking along contiguous dimension if possible.
    Definition: tile_traits_standard.h:194
    +
    Tile_ Tile
    Shape of tile.
    Definition: tile_traits_standard.h:137
    +
    static int const kWarpsContiguous
    Warps stripmined contiguous dimension.
    Definition: tile_traits_standard.h:160
    +
    CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
    Computes the logical coordinate from thread shape.
    Definition: tile_traits_standard.h:42
    +
    Shape< 1, kWarpsStrided, kWarpSize > Delta
    The same warp rakes along the contiguous dimension.
    Definition: tile_traits_standard.h:166
    +
    CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
    Basic thread offset function computed from a thread shape.
    Definition: tile_traits_standard.h:175
    +
    Basic thread offset function computed from a thread shape.
    Definition: tile_traits_standard.h:39
    +
    static int const kH
    The height of the cube.
    Definition: shape.h:68
    +
    static int const kThreads
    Number of participating threads.
    Definition: tile_traits_standard.h:140
    +
    Shape< 1, ThreadShape::kH, 1, 1 > Delta
    Delta along each dimension.
    Definition: tile_traits_standard.h:86
    +
    Shape< 1, kThreads/Tile::kW, Tile::kW, 1 > ThreadShape
    Shape of threads.
    Definition: tile_traits_standard.h:76
    +
    static int const kWarpSize
    Hard-coded warp size.
    Definition: tile_traits_standard.h:143
    +
    #define __NV_STD_MIN(a, b)
    Select minimum(a, b)
    Definition: platform.h:160
    +
    Tile_ Tile
    Shape of tile.
    Definition: tile_traits_standard.h:196
    +
    Tile_ Tile
    Shape of tile.
    Definition: tile_traits_standard.h:69
    +
    static int const kWarpCount
    Number of participating warps.
    Definition: tile_traits_standard.h:146
    +
    Shape< 1, kWarpsStrided, kWarpsContiguous *kWarpSize > ThreadShape
    Arrangement of threads.
    Definition: tile_traits_standard.h:163
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    Definition: tile_traits_standard.h:67
    +
    std::conditional (true specialization)
    Definition: platform.h:343
    +
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    + +
    TiledThreadOffset< ThreadShape > ThreadOffset
    Computes the initial offset.
    Definition: tile_traits_standard.h:92
    +
    Tile_ Tile
    Shape of tile.
    Definition: tile_traits_standard.h:102
    +
    static int const kW
    The width of the cube.
    Definition: shape.h:70
    +
    Tiling in which warps rake across the contiguous dimension.
    Definition: tile_traits_standard.h:135
    +
    static int const kWarpSize
    Hard-coded warp size.
    Definition: tile_traits_standard.h:202
    +
    Shape< 1, 1, kThreads > Delta
    Delta between each thread&#39;s access.
    Definition: tile_traits_standard.h:122
    +
    Shape< 1, 1, kThreads > ThreadShape
    Thread shape.
    Definition: tile_traits_standard.h:110
    +
    Compute derived counted of a Layout Concept based class.
    Definition: shape.h:79
    +
    TiledThreadOffset< ThreadShape > ThreadOffset
    Computes the initial offset.
    Definition: tile_traits_standard.h:128
    +
    static int const kThreads
    Number of participating threads.
    Definition: tile_traits_standard.h:199
    +
    Shape< 1, Tile::kH, Tile::kW/kThreads > Iterations
    Number of iterations.
    Definition: tile_traits_standard.h:125
    +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1Vector-members.html b/docs/generated-html/unioncutlass_1_1Vector-members.html new file mode 100644 index 00000000..f581f8db --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1Vector-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Vector< Scalar_, kLanes_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1Vector.html b/docs/generated-html/unioncutlass_1_1Vector.html new file mode 100644 index 00000000..f8e02771 --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1Vector.html @@ -0,0 +1,314 @@ + + + + + + + +Cutlass: cutlass::Vector< Scalar_, kLanes_ > Union Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Vector< Scalar_, kLanes_ > Union Template Reference
    +
    +
    + +

    #include <vector.h>

    + + + + + + + + + + + + + + +

    +Public Types

    enum  { kLanes = kLanes_ + }
     The number of elements in the vector. More...
     
    enum  { kVectorSize = kLanes * (int)sizeof(Scalar) + }
     The size of the vector. More...
     
    enum  
     The number of registers needed to store the vector. More...
     
    typedef Scalar_ Scalar
     The scalar type. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE Scalar const & operator[] (uint32_t i) const
     Accessor to the ith lane. More...
     
    CUTLASS_DEVICE Scalaroperator[] (uint32_t i)
     Accessor to the ith lane. More...
     
    + + + + + + + + + + +

    +Public Attributes

    AlignedStruct< kVectorSizealigned_
     The aligned storage to make sure we have good alignment. More...
     
    Scalar scalars [kLanes]
     The associated array of scalars. More...
     
    uint32_t registers [kRegisters]
     The data in registers. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    typedef Scalar_ cutlass::Vector< Scalar_, kLanes_ >::Scalar
    +
    + +
    +
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    kLanes 
    + +
    +
    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    kVectorSize 
    + +
    +
    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    anonymous enum
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ operator[]() [1/2]

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Scalar const& cutlass::Vector< Scalar_, kLanes_ >::operator[] (uint32_t i) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [2/2]

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Scalar& cutlass::Vector< Scalar_, kLanes_ >::operator[] (uint32_t i)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ aligned_

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    AlignedStruct<kVectorSize> cutlass::Vector< Scalar_, kLanes_ >::aligned_
    +
    + +
    +
    + +

    ◆ registers

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    uint32_t cutlass::Vector< Scalar_, kLanes_ >::registers[kRegisters]
    +
    + +
    +
    + +

    ◆ scalars

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    Scalar cutlass::Vector< Scalar_, kLanes_ >::scalars[kLanes]
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4-members.html b/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4-members.html new file mode 100644 index 00000000..26516dab --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Vector< half, kLanes_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html b/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html new file mode 100644 index 00000000..80d3d9ee --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html @@ -0,0 +1,314 @@ + + + + + + + +Cutlass: cutlass::Vector< half, kLanes_ > Union Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Vector< half, kLanes_ > Union Template Reference
    +
    +
    + +

    #include <vector.h>

    + + + + + + + + + + + + + + +

    +Public Types

    enum  { kLanes = kLanes_ + }
     The number of elements in the vector. More...
     
    enum  { kVectorSize = kLanes * (int)sizeof(Scalar) + }
     The size of the vector. More...
     
    enum  
     The number of registers needed to store the vector. More...
     
    typedef half Scalar
     The scalar type. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE Scalar const & operator[] (uint32_t i) const
     Accessor to the ith lane. More...
     
    CUTLASS_DEVICE Scalaroperator[] (uint32_t i)
     Accessor to the ith lane. More...
     
    + + + + + + + + + + +

    +Public Attributes

    AlignedStruct< kVectorSizealigned_
     The aligned storage to make sure we have good alignment. More...
     
    uint16_t scalars [kLanes]
     The associated array of scalars. More...
     
    uint32_t registers [kRegisters]
     The data in registers. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    typedef half cutlass::Vector< half, kLanes_ >::Scalar
    +
    + +
    +
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    kLanes 
    + +
    +
    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    kVectorSize 
    + +
    +
    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    anonymous enum
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ operator[]() [1/2]

    + +
    +
    +
    +template<int kLanes_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Scalar const& cutlass::Vector< half, kLanes_ >::operator[] (uint32_t i) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [2/2]

    + +
    +
    +
    +template<int kLanes_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Scalar& cutlass::Vector< half, kLanes_ >::operator[] (uint32_t i)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ aligned_

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    AlignedStruct<kVectorSize> cutlass::Vector< half, kLanes_ >::aligned_
    +
    + +
    +
    + +

    ◆ registers

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    uint32_t cutlass::Vector< half, kLanes_ >::registers[kRegisters]
    +
    + +
    +
    + +

    ◆ scalars

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    uint16_t cutlass::Vector< half, kLanes_ >::scalars[kLanes]
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage-members.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage-members.html new file mode 100644 index 00000000..5b998e32 --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html new file mode 100644 index 00000000..1a79c8cf --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html @@ -0,0 +1,139 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage Union Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage Union Reference
    +
    +
    + +

    The shared memory storage to exchange data. +

    + +

    #include <gemm_epilogue_traits.h>

    + + + + + + +

    +Public Attributes

    SharedStoreIteratorD::SharedStorage store
     
    SharedLoadIteratorD::SharedStorage load
     
    +

    Member Data Documentation

    + +

    ◆ load

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    SharedLoadIteratorD::SharedStorage cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage::load
    +
    + +
    +
    + +

    ◆ store

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    SharedStoreIteratorD::SharedStorage cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage::store
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage-members.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage-members.html new file mode 100644 index 00000000..be28d80e --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html new file mode 100644 index 00000000..c182796b --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html @@ -0,0 +1,139 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage Union Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage Union Reference
    +
    +
    + +

    The storage in shared memory. +

    + +

    #include <gemm_traits.h>

    + + + + + + +

    +Public Attributes

    MainLoopSharedStorage main_loop
     
    Epilogue::SharedStorage epilogue
     
    +

    Member Data Documentation

    + +

    ◆ epilogue

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    Epilogue::SharedStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage::epilogue
    +
    + +
    +
    + +

    ◆ main_loop

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    MainLoopSharedStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage::main_loop
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage-members.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage-members.html new file mode 100644 index 00000000..b675d5ab --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html new file mode 100644 index 00000000..97b2113b --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html @@ -0,0 +1,140 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ > Union Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ > Union Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    + + + + + + +

    +Public Attributes

    GlobalLoadStream_::SharedStorage global
     
    SharedLoadStream_::SharedStorage shared
     
    +

    Member Data Documentation

    + +

    ◆ global

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    +
    +template<typename GlobalLoadStream_, typename SharedLoadStream_>
    + + + + +
    GlobalLoadStream_::SharedStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >::global
    +
    + +
    +
    + +

    ◆ shared

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    +
    +template<typename GlobalLoadStream_, typename SharedLoadStream_>
    + + + + +
    SharedLoadStream_::SharedStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >::shared
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage-members.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage-members.html new file mode 100644 index 00000000..02b68012 --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html new file mode 100644 index 00000000..77fcb123 --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html @@ -0,0 +1,139 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage Union Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage Union Reference
    +
    +
    + +

    The storage in shared memory needed by that stream. +

    + +

    #include <gemm_global_stream.h>

    + + + + + + +

    +Public Attributes

    LoadIterator::SharedStorage load_iterator
     
    SharedStoreStorage store_iterator
     
    +

    Member Data Documentation

    + +

    ◆ load_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    LoadIterator::SharedStorage cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage::load_iterator
    +
    + +
    +
    + +

    ◆ store_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    SharedStoreStorage cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage::store_iterator
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/vector_8h.html b/docs/generated-html/vector_8h.html new file mode 100644 index 00000000..a3e0c090 --- /dev/null +++ b/docs/generated-html/vector_8h.html @@ -0,0 +1,165 @@ + + + + + + + +Cutlass: vector.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    vector.h File Reference
    +
    +
    + +

    Defines a 1D vector of elements held in the registers of each thread. +More...

    +
    #include <cuda_fp16.h>
    +#include <cutlass/util/platform.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::AlignedStruct< kAlignment_ >
     
    union  cutlass::Vector< Scalar_, kLanes_ >
     
    union  cutlass::Vector< half, kLanes_ >
     
    struct  cutlass::Vectorize< Element_, kLanes_ >
     
    struct  cutlass::Vectorize< Element_, 1 >
     
    struct  cutlass::Extent< T >
     Returns the extent of a scalar or vector. More...
     
    struct  cutlass::Extent< Vector< T, Lanes > >
     Returns the number of lanes of a vector if need be. More...
     
    struct  cutlass::Extent< Vector< T, Lanes > const >
     Returns the number of lanes of a vector if need be. More...
     
    struct  cutlass::VectorTraits< T >
     Traits describing properties of vectors and scalar-as-vectors. More...
     
    struct  cutlass::VectorTraits< Vector< T, Lanes > >
     Partial specialization for actual cutlass::Vector. More...
     
    struct  cutlass::VectorTraits< Vector< T, Lanes > const >
     Partial specialization for actual cutlass::Vector. More...
     
    + + + +

    +Namespaces

     cutlass
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<>
    struct cutlass::__align__ (1) AlignedStruct< 1 >
     
    template<>
    struct cutlass::__align__ (2) AlignedStruct< 2 >
     
    template<>
    struct cutlass::__align__ (4) AlignedStruct< 4 >
     
    template<>
    struct cutlass::__align__ (8) AlignedStruct< 8 >
     
    template<>
    struct cutlass::__align__ (16) AlignedStruct< 16 >
     
    template<>
    struct cutlass::__align__ (32) AlignedStruct< 32 >
     
    template<>
    struct cutlass::__align__ (64) AlignedStruct< 64 >
     
    template<typename Scalar_ >
    CUTLASS_DEVICE void cutlass::make_zero (Scalar_ &x)
     
    template<typename Scalar_ , int kLanes_>
    CUTLASS_DEVICE void cutlass::make_zero (Vector< Scalar_, kLanes_ > &vec)
     
    +
    + + + + diff --git a/docs/generated-html/vector_8h_source.html b/docs/generated-html/vector_8h_source.html new file mode 100644 index 00000000..73582385 --- /dev/null +++ b/docs/generated-html/vector_8h_source.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: vector.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    vector.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
    31 #include <cuda_fp16.h>
    32 #endif
    33 
    34 #include <cutlass/util/platform.h>
    35 
    36 namespace cutlass {
    37 
    39 
    40 template <size_t kAlignment_>
    41 struct AlignedStruct {};
    42 
    43 template <>
    44 struct __align__(1) AlignedStruct<1>{};
    45 template <>
    46 struct __align__(2) AlignedStruct<2>{};
    47 template <>
    48 struct __align__(4) AlignedStruct<4>{};
    49 template <>
    50 struct __align__(8) AlignedStruct<8>{};
    51 template <>
    52 struct __align__(16) AlignedStruct<16>{};
    53 template <>
    54 struct __align__(32) AlignedStruct<32>{};
    55 template <>
    56 struct __align__(64) AlignedStruct<64>{};
    57 
    59 
    60 template <typename Scalar_, int kLanes_>
    61 union Vector {
    63  typedef Scalar_ Scalar;
    64 
    66  enum { kLanes = kLanes_ };
    68  enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
    70  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
    71 
    72  // Make sure that the vector type makes sense.
    73  static_assert(kVectorSize <= 16, "Vector type is too large");
    74 
    80  uint32_t registers[kRegisters];
    81 
    83  CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const { return scalars[i]; }
    85  CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return scalars[i]; }
    86 };
    87 
    89 
    90 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
    91 
    92 template <int kLanes_>
    93 union Vector<half, kLanes_> {
    95  typedef half Scalar;
    96 
    98  enum { kLanes = kLanes_ };
    100  enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
    102  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
    103 
    104  // Make sure that the vector type makes sense.
    105  static_assert(kVectorSize <= size_t(16), "Vector type is too large");
    106 
    110  uint16_t scalars[kLanes];
    112  uint32_t registers[kRegisters];
    113 
    115  CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const {
    116  return reinterpret_cast<Scalar const&>(scalars[i]);
    117  }
    119  CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return reinterpret_cast<Scalar&>(scalars[i]); }
    120 };
    121 
    122 #endif
    123 
    125 
    126 template <typename Scalar_>
    127 CUTLASS_DEVICE void make_zero(Scalar_& x) {
    128  x = Scalar_(0);
    129 }
    130 
    132 
    133 template <typename Element_, int kLanes_ = 1>
    134 struct Vectorize {
    136 };
    137 
    139 
    140 template <typename Element_>
    141 struct Vectorize<Element_, 1> {
    142  typedef Element_ Type;
    143 };
    144 
    146 
    147 template <typename Scalar_, int kLanes_>
    148 CUTLASS_DEVICE void make_zero(Vector<Scalar_, kLanes_>& vec) {
    149  for (int i = 0; i < Vector<Scalar_, kLanes_>::kRegisters; ++i) {
    150  vec.registers[i] = 0;
    151  }
    152 }
    153 
    155 //
    156 // cutlass::Extent similar to std::extent but applicable to CUTLASS types
    157 //
    158 
    160 template <typename T>
    161 struct Extent {
    162  static size_t const kValue = 1;
    163 };
    164 
    166 template <typename T, int Lanes>
    167 struct Extent<Vector<T, Lanes> > {
    168  static size_t const kValue = Lanes;
    169 };
    170 
    172 template <typename T, int Lanes>
    173 struct Extent<Vector<T, Lanes> const> {
    174  static size_t const kValue = Lanes;
    175 };
    176 
    178 
    180 template <typename T>
    181 struct VectorTraits {
    183  typedef T Scalar;
    184 
    186  static int const kLanes = 1;
    187 
    189  static bool const IsVector = false;
    190 
    193 };
    194 
    196 template <typename T, int Lanes>
    197 struct VectorTraits<Vector<T, Lanes> > {
    199  typedef T Scalar;
    200 
    202  static int const kLanes = Lanes;
    203 
    205  static bool const IsVector = true;
    206 
    209 };
    210 
    212 template <typename T, int Lanes>
    213 struct VectorTraits<Vector<T, Lanes> const> {
    215  typedef T Scalar;
    216 
    218  static int const kLanes = Lanes;
    219 
    221  static bool const IsVector = true;
    222 
    225 };
    226 
    228 
    229 } // namespace cutlass
    Element_ Type
    Definition: vector.h:142
    +
    Definition: convert.h:33
    +
    Definition: vector.h:134
    +
    CUTLASS_DEVICE void make_zero(Scalar_ &x)
    Definition: vector.h:127
    +
    Definition: vector.h:41
    +
    T Scalar
    Scalar type.
    Definition: vector.h:183
    +
    struct __align__(1) AlignedStruct< 1 >
    Definition: vector.h:44
    +
    C++ features that may be otherwise unimplemented for CUDA device functions.
    +
    Scalar_ Scalar
    The scalar type.
    Definition: vector.h:63
    +
    Definition: vector.h:66
    +
    half Scalar
    The scalar type.
    Definition: vector.h:95
    +
    uint32_t registers[kRegisters]
    The data in registers.
    Definition: vector.h:80
    +
    Vector< T, 1 > Vector
    Type that is always a vector.
    Definition: vector.h:192
    +
    CUTLASS_DEVICE Scalar & operator[](uint32_t i)
    Accessor to the ith lane.
    Definition: vector.h:119
    +
    CUTLASS_DEVICE Scalar & operator[](uint32_t i)
    Accessor to the ith lane.
    Definition: vector.h:85
    +
    Traits describing properties of vectors and scalar-as-vectors.
    Definition: vector.h:181
    +
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    Definition: vector.h:61
    +
    static bool const IsVector
    True if the type is actually a cutlass::Vector, otherwise false.
    Definition: vector.h:189
    +
    Scalar scalars[kLanes]
    The associated array of scalars.
    Definition: vector.h:78
    +
    Vector< T, Lanes > Vector
    Type that is always a Vector.
    Definition: vector.h:224
    +
    Definition: vector.h:68
    +
    static int const kLanes
    Number of lanes of vector.
    Definition: vector.h:186
    +
    CUTLASS_DEVICE Scalar const & operator[](uint32_t i) const
    Accessor to the ith lane.
    Definition: vector.h:115
    +
    T Scalar
    Scalar type.
    Definition: vector.h:215
    +
    Vector< Element_, kLanes_ > Type
    Definition: vector.h:135
    +
    T Scalar
    Scalar type.
    Definition: vector.h:199
    +
    static size_t const kValue
    Definition: vector.h:162
    +
    AlignedStruct< kVectorSize > aligned_
    The aligned storage to make sure we have good alignment.
    Definition: vector.h:73
    +
    AlignedStruct< kVectorSize > aligned_
    The aligned storage to make sure we have good alignment.
    Definition: vector.h:105
    +
    Vector< T, Lanes > Vector
    Type that is always a Vector.
    Definition: vector.h:208
    +
    CUTLASS_DEVICE Scalar const & operator[](uint32_t i) const
    Accessor to the ith lane.
    Definition: vector.h:83
    +
    Returns the extent of a scalar or vector.
    Definition: vector.h:161
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__epilogue__traits_8h.html b/docs/generated-html/wmma__gemm__epilogue__traits_8h.html new file mode 100644 index 00000000..31a79594 --- /dev/null +++ b/docs/generated-html/wmma__gemm__epilogue__traits_8h.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: wmma_gemm_epilogue_traits.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_epilogue_traits.h File Reference
    +
    +
    + +

    Defines structural properties of WMMA GEMM's epilogue phase. +More...

    + +

    Go to the source code of this file.

    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__epilogue__traits_8h_source.html b/docs/generated-html/wmma__gemm__epilogue__traits_8h_source.html new file mode 100644 index 00000000..92d9abc1 --- /dev/null +++ b/docs/generated-html/wmma__gemm__epilogue__traits_8h_source.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: wmma_gemm_epilogue_traits.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_epilogue_traits.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/wmma_matrix.h>
    31 #ifdef CUTLASS_USE_WMMA_API
    32 
    33 #include <cutlass/convert.h>
    34 #include <cutlass/coord.h>
    40 #include <cutlass/reshape_tile.h>
    41 #include <cutlass/tile_iterator.h>
    42 
    43 namespace cutlass {
    44 namespace gemm {
    45 
    47 
    48 template <typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    49 struct WmmaGemmEpilogueTraitsHelper {
    51  typedef typename EpilogueFunctor_::Scalar Scalar;
    53  typedef typename GemmConfig_::OutputTile OutputTile;
    54 
    56  static int const kWmmasPerH =
    57  GemmConfig_::AccumulatorsPerWarp::kH / GemmConfig_::InstructionShape::kH;
    59  typedef Shape<1, 1, kWmmasPerH> Iterations;
    60  // The iteration strides in the H/W dimension.
    61  typedef Shape<0, 0, 0> Delta;
    63  typedef EpilogueFunctor_ Functor;
    64 
    66  typedef WmmaGemmSharedStoreTileDTraits<
    67  // The output layout.
    69  // The pointer is float.
    70  typename Functor::Scalar,
    71  // The output tile size.
    72  typename GemmConfig_::OutputTile,
    73  // The number of warps.
    74  typename GemmConfig_::Warps,
    75  // The shape of the instruction.
    76  typename GemmConfig_::InstructionShape>
    77  SharedStoreTileTraits;
    78 
    79  typedef WmmaMatrix<GemmOperand::kC,
    81  Scalar,
    82  typename GemmConfig_::InstructionShape>
    83  WmmaMatrix;
    84 
    86  typedef TileStoreIterator<SharedStoreTileTraits,
    87  typename SharedStoreTileTraits::Scalar,
    90  Index_,
    91  WmmaMatrix,
    93  SharedStoreIteratorD;
    94 
    96  typedef Copy<typename SharedStoreIteratorD::Fragment> SharedStoreTransformerD;
    97 
    99  typedef WmmaGemmSharedLoadTileDTraits<
    100  // The pointer.
    101  typename Functor::Scalar,
    102  // The tile size.
    103  typename SharedStoreIteratorD::Tile,
    104  // The number of threads.
    105  Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>,
    106  // The number of scalars per LDS.
    107  GemmConfig_::kScalarsPerLdsD>
    108  SharedLoadTileTraits;
    109 
    111  typedef TileLoadIterator<SharedLoadTileTraits,
    112  typename SharedLoadTileTraits::Scalar,
    115  SharedLoadIteratorD;
    116 
    118  typedef WmmaGemmGlobalIteratorCdTraits<
    119  // The pointer is float const.
    120  typename GemmConfig_::ScalarC const,
    121  // The tile has size (N / Iterations)xM in GEMM's terminology.
    122  Shape<1,
    123  GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount,
    124  GemmConfig_::OutputTile::kW>,
    125  // The threads are distributed as warps x 32 (the traits may reorganize).
    126  Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>,
    127  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    128  GemmConfig_::kScalarsPerLdgC>
    129  GlobalLoadTileTraits;
    130 
    132  typedef WmmaGemmGlobalIteratorCd<GlobalLoadTileTraits, Index_> GlobalLoadIteratorC;
    134  typedef Copy<typename GlobalLoadIteratorC::Fragment> GlobalTransformerC;
    135 
    137  typedef WmmaGemmGlobalIteratorCdTraits<
    138  // The pointer is float.
    139  typename GemmConfig_::ScalarD,
    140  // The tile has size (N / Iterations)xM in GEMM's terminology.
    141  Shape<1,
    142  GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount,
    143  GemmConfig_::OutputTile::kW>,
    144  // The threads are distributed as warps x 32 (the traits may reorganize).
    145  Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>,
    146  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    147  GemmConfig_::kScalarsPerStgD>
    148  GlobalStoreTileTraits;
    149 
    151  typedef WmmaGemmGlobalIteratorCd<GlobalStoreTileTraits, Index_> GlobalStoreIteratorD;
    153  typedef Copy<typename GlobalStoreIteratorD::Fragment> GlobalTransformerD;
    154 };
    155 
    157 
    158 } // namespace gemm
    159 } // namespace cutlass
    160 
    161 #endif // defined CUTLASS_USE_WMMA_API
    Abstractions for loading and storing matrices using the CUDA WMMA API.
    +
    Definition: load_store.h:42
    +
    Definition: convert.h:33
    +
    Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
    +
    Implements the BLAS linear scaling function alpha*AB + beta*C.
    +
    A Coord is a coordinate of arbitrary rank into a tensor or matrix.
    +
    Definition: tile_iterator.h:62
    +
    Definition: matrix_traits.h:43
    +
    Defines a type for restructuring a tile.
    +
    Definition: tile_iterator.h:67
    +
    Defines tile iterator traits for loading thread block-level tile from global memory.
    +
    static int const kCount
    The number of elements in the 4D space.
    Definition: shape.h:91
    +
    Definition: matrix_traits.h:36
    +
    Implements efficient loading of the thread block-level tile from global memory and storing to shared ...
    +
    Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEM...
    +
    Defines conversion operations among Fragments of different base type.
    +
    Defines iterator traits for efficiently loading and storing fragment to and from shared memory...
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__global__tile_8h.html b/docs/generated-html/wmma__gemm__global__tile_8h.html new file mode 100644 index 00000000..6c8b116f --- /dev/null +++ b/docs/generated-html/wmma__gemm__global__tile_8h.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: wmma_gemm_global_tile.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    wmma_gemm_global_tile.h File Reference
    +
    +
    + +

    Defines tile iterator traits for loading thread block-level tile from global memory. +More...

    + +

    Go to the source code of this file.

    + + + + + + + + + + + + +

    +Classes

    struct  cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >
     
    struct  cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    struct  cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
     
    struct  cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params
     The params. More...
     
    + + + + + +

    +Namespaces

     cutlass
     
     cutlass::gemm
     
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__global__tile_8h_source.html b/docs/generated-html/wmma__gemm__global__tile_8h_source.html new file mode 100644 index 00000000..4e58863d --- /dev/null +++ b/docs/generated-html/wmma__gemm__global__tile_8h_source.html @@ -0,0 +1,142 @@ + + + + + + + +Cutlass: wmma_gemm_global_tile.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_global_tile.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    31 
    32 namespace cutlass {
    33 namespace gemm {
    34 
    36 
    37 template <typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    38 struct WmmaGemmGlobalIteratorCdTraits : public GemmGlobalTileTraits<GemmOperand::kC,
    39  MatrixLayout::kColumnMajor,
    40  Scalar_,
    41  Tile_,
    42  Threads_,
    43  kAccessSize_> {
    47  Scalar_,
    48  Tile_,
    49  Threads_,
    50  kAccessSize_>
    52 
    55 
    57  struct ThreadOffset {
    59  Coord<4> operator()() const {
    60  int thread_offset_h = threadIdx.x / Base::Threads::kW;
    61  int thread_offset_w = threadIdx.x % Base::Threads::kW * Base::ThreadsDelta::kW;
    62 
    63  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
    64  }
    65  };
    66 };
    67 
    69 
    70 template <typename TileTraits_, typename Index_ = int>
    71 struct WmmaGemmGlobalIteratorCd : public TileIteratorBase<TileTraits_,
    72  typename TileTraits_::Scalar,
    73  IteratorAdvance::kH,
    74  MemorySpace::kGlobal,
    75  Index_> {
    79  typedef TileTraits_ Traits;
    81  typedef TileIteratorBase<Traits,
    82  typename TileTraits_::Scalar,
    85  Index_>
    90  static MatrixLayout::Kind const kLayout = TileTraits_::kLayout;
    91 
    93  typedef typename TileTraits_::Scalar Scalar;
    95  typedef typename TileTraits_::Pointer Pointer;
    97  typedef typename TileTraits_::Threads Threads;
    99  typedef Index_ Index;
    101  typedef typename TileTraits_::ThreadOffset ThreadOffset;
    102 
    104  struct Params {
    115 
    118  Pointer pointer, Index ld, Index n, Index epilogue_stride_w, Index epilogue_delta_w) {
    119  // The pointer.
    120  this->pointer = pointer;
    121  // Setup the base stride. One "group of threads" per column.
    122  stride_h = ld;
    123  // Each thread output 1 column per iteration. .
    124  inc_h = ld * TileTraits_::Threads::kH;
    125  inc_advance = inc_h + epilogue_stride_w;
    126 
    127  predicate_offset = n;
    128  predicate_inc_h = TileTraits_::Threads::kH;
    129  predicate_inc_advance = predicate_inc_h + epilogue_delta_w;
    130 
    131  // It worked.
    132  return 0;
    133  }
    134  };
    135 
    137 
    139 
    141  CUTLASS_DEVICE WmmaGemmGlobalIteratorCd() {}
    142 
    144  CUTLASS_DEVICE WmmaGemmGlobalIteratorCd(Params const& params,
    145  const Coord<3>& bounds,
    146  const Coord<3>& block,
    147  int const pointer_offset = 0,
    148  int const pred_offset = 0,
    149  ThreadOffset thread_offset_func = ThreadOffset())
    150 
    151  : params(params) {
    152  thread_offset = thread_offset_func();
    153  // Each warp works on a different column of the tile.
    154  int const h = thread_offset[1] + block[1];
    155  // Each lane writes a different element.
    156  int const w = thread_offset[2] + block[2];
    157  // Setup the pointer.
    158  this->params.pointer += ((h * params.stride_h + w) + pointer_offset);
    159 
    160  // Prepare the vector of predicates.
    161  for (int i = 0; i < Base::Iterations::kW; ++i) {
    162  predicates.set(i, w + i * Base::Delta::kW < bounds[2]);
    163  }
    164  this->params.predicate_offset -= (h + pred_offset);
    165  }
    166 
    168  CUTLASS_DEVICE void inc_c() {}
    170  CUTLASS_DEVICE void inc_w() {}
    172  CUTLASS_DEVICE void inc_h() {
    175  }
    177  CUTLASS_DEVICE void inc_d() {}
    179  CUTLASS_DEVICE void inc_advance() {
    182  }
    183 
    185  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const {
    186  return predicates.at(w) && params.predicate_offset > 0;
    187  }
    188 
    191  Pointer data() { return params.pointer; }
    192 
    194  Pointer const data() const { return params.pointer; }
    195 
    198 };
    199 
    201 
    202 } // namespace gemm
    203 } // namespace cutlass
    TileTraits_::Threads Threads
    The threads.
    Definition: wmma_gemm_global_tile.h:97
    +
    Definition: convert.h:33
    +
    Defines iterators for efficiently loading and storing to global memory.
    +
    Definition: gemm_global_tile.h:70
    +
    CUTLASS_HOST_DEVICE bool at(int idx) const
    Accesses a bit within the predicate vector.
    Definition: predicate_vector.h:356
    +
    CUTLASS_DEVICE void inc_d()
    Increment the pointer in the D dimension.
    Definition: wmma_gemm_global_tile.h:177
    +
    Definition: load_store.h:43
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:241
    +
    Index stride_h
    The stride in the H dimension to setup the thread in the block.
    Definition: wmma_gemm_global_tile.h:108
    +
    CUTLASS_DEVICE void inc_w()
    Increment the pointer in the W dimension.
    Definition: wmma_gemm_global_tile.h:170
    +
    Index_ Index
    The index.
    Definition: wmma_gemm_global_tile.h:99
    +
    TileTraits_::Scalar Scalar
    The scalar.
    Definition: wmma_gemm_global_tile.h:93
    +
    Definition: tile_iterator.h:62
    +
    Definition: matrix_traits.h:43
    +
    Params params
    Definition: wmma_gemm_global_tile.h:136
    +
    Index predicate_inc_h
    The strides to increment the predicate offset.
    Definition: wmma_gemm_global_tile.h:114
    +
    Pointer pointer
    The pointer.
    Definition: wmma_gemm_global_tile.h:106
    +
    CUTLASS_HOST_DEVICE Pointer const data() const
    Definition: wmma_gemm_global_tile.h:194
    +
    CUTLASS_DEVICE void inc_advance()
    Increment the pointer to move to the next iteration.
    Definition: wmma_gemm_global_tile.h:179
    +
    The params.
    Definition: wmma_gemm_global_tile.h:104
    +
    Index inc_h
    The strides to increment the pointer.
    Definition: wmma_gemm_global_tile.h:110
    +
    TileIteratorBase< Traits, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > Base
    The base class.
    Definition: wmma_gemm_global_tile.h:86
    +
    CUTLASS_DEVICE WmmaGemmGlobalIteratorCd()
    Ctor.
    Definition: wmma_gemm_global_tile.h:141
    +
    Index predicate_offset
    The column offset to compute the predicate for the columns.
    Definition: wmma_gemm_global_tile.h:112
    +
    CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
    Definition: wmma_gemm_global_tile.h:59
    +
    Definition: wmma_gemm_global_tile.h:71
    +
    Index predicate_inc_advance
    Definition: wmma_gemm_global_tile.h:114
    +
    TileTraits_::Pointer Pointer
    The pointer.
    Definition: wmma_gemm_global_tile.h:95
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    Shape< 0, 0, Base::Delta::kW, Base::Delta::kC > Delta
    Override the strides in each dimension between different loads/stores.
    Definition: wmma_gemm_global_tile.h:54
    + +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    Coord< 4 > thread_offset
    Definition: wmma_gemm_global_tile.h:138
    +
    Index inc_advance
    Definition: wmma_gemm_global_tile.h:110
    +
    static MatrixLayout::Kind const kLayout
    The layout.
    Definition: wmma_gemm_global_tile.h:90
    + +
    Definition: wmma_gemm_global_tile.h:38
    +
    Iterator for accessing a stripmined tile in memory.
    Definition: tile_iterator.h:102
    +
    TileTraits_::ThreadOffset ThreadOffset
    The thread offset functor.
    Definition: wmma_gemm_global_tile.h:101
    +
    Definition: matrix_traits.h:36
    +
    CUTLASS_HOST_DEVICE Pointer data()
    Returns the raw pointer.
    Definition: wmma_gemm_global_tile.h:191
    +
    static int const kW
    The width of the cube.
    Definition: shape.h:70
    +
    CUTLASS_HOST_DEVICE void set(int idx, bool value=true)
    Set a bit within the predicate vector.
    Definition: predicate_vector.h:364
    +
    Kind
    Definition: matrix_traits.h:36
    +
    GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > Base
    The base class.
    Definition: wmma_gemm_global_tile.h:51
    +
    CUTLASS_DEVICE WmmaGemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int const pointer_offset=0, int const pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())
    Ctor.
    Definition: wmma_gemm_global_tile.h:144
    +
    WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > This_
    This class.
    Definition: wmma_gemm_global_tile.h:77
    +
    cutlass::PredicateVector< Base::Iterations::kW > predicates
    The predicates for the row.
    Definition: wmma_gemm_global_tile.h:197
    +
    Shape< 0, 0, Base::Delta::kW, Base::Delta::kC > ImmediateOffsetStrides
    Override the strides in each dimension between different loads/stores.
    Definition: wmma_gemm_global_tile.h:88
    +
    Computes the thread offset in (H, W) based on thread ID.
    Definition: wmma_gemm_global_tile.h:57
    +
    CUTLASS_DEVICE void inc_c()
    Increment the pointer in the C dimension.
    Definition: wmma_gemm_global_tile.h:168
    +
    CUTLASS_DEVICE void inc_h()
    Increment the pointer in the H dimension.
    Definition: wmma_gemm_global_tile.h:172
    +
    TileTraits_ Traits
    The traits.
    Definition: wmma_gemm_global_tile.h:79
    +
    CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
    Test the predicate.
    Definition: wmma_gemm_global_tile.h:185
    +
    CUTLASS_HOST_DEVICE int initialize(Pointer pointer, Index ld, Index n, Index epilogue_stride_w, Index epilogue_delta_w)
    Setup the params.
    Definition: wmma_gemm_global_tile.h:117
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__multiply__add_8h.html b/docs/generated-html/wmma__gemm__multiply__add_8h.html new file mode 100644 index 00000000..b81b0189 --- /dev/null +++ b/docs/generated-html/wmma__gemm__multiply__add_8h.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: wmma_gemm_multiply_add.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_multiply_add.h File Reference
    +
    +
    + +

    Implements warp-level matrix multiply-accumulate operation using CUDA WMMA API. +More...

    + +

    Go to the source code of this file.

    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__multiply__add_8h_source.html b/docs/generated-html/wmma__gemm__multiply__add_8h_source.html new file mode 100644 index 00000000..399aa11f --- /dev/null +++ b/docs/generated-html/wmma__gemm__multiply__add_8h_source.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: wmma_gemm_multiply_add.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_multiply_add.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/wmma_matrix.h>
    31 #ifdef CUTLASS_USE_WMMA_API
    32 #include <cutlass/fragment.h>
    33 
    34 namespace cutlass {
    35 namespace gemm {
    36 
    38 
    39 template <MatrixLayout::Kind kLayoutA_,
    40  typename ScalarA_,
    41  MatrixLayout::Kind kLayoutB_,
    42  typename ScalarB_,
    43  MatrixLayout::Kind kLayoutC_,
    44  typename ScalarC_,
    45  typename AccumulatorsPerWarp_,
    46  typename InstructionShape_>
    47 struct WmmaGemmMultiplyAdd {
    49  typedef InstructionShape_ InstructionShape;
    51  typedef Shape<1, InstructionShape_::kH, InstructionShape_::kW> ThreadsPerWarp;
    53  typedef AccumulatorsPerWarp_ AccumulatorsPerWarp;
    55  typedef ScalarA_ ScalarA;
    57  typedef ScalarB_ ScalarB;
    59  typedef ScalarC_ ScalarC;
    62 
    64  typedef WmmaMatrix<GemmOperand::kA, kLayoutA_, ScalarA, InstructionShape> ElementA;
    66  typedef Fragment<ElementA, Iterations::kW> FragmentA;
    67 
    69  typedef WmmaMatrix<GemmOperand::kB, kLayoutB_, ScalarB, InstructionShape> ElementB;
    71  typedef Fragment<ElementB, Iterations::kH> FragmentB;
    72 
    74  typedef WmmaMatrix<GemmOperand::kC, kLayoutC_, ScalarC, InstructionShape> ElementC;
    76  typedef Fragment<ElementC, Iterations::kH * Iterations::kW> Accumulators;
    77 
    79  CUTLASS_DEVICE WmmaGemmMultiplyAdd() {}
    80 
    82  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
    83  FragmentB const& b,
    84  Accumulators const& c,
    85  Accumulators& d) {
    86  for (int j = 0; j < Iterations::kH; ++j) {
    87  for (int i = 0; i < Iterations::kW; ++i) {
    88  // The input elements.
    89  ElementA const& elt_a = a[i];
    90  ElementB const& elt_b = b[j];
    91  ElementC const& elt_c = c[j * Iterations::kW + i];
    92 
    93  // The output element.
    94  ElementC& elt_d = d[j * Iterations::kW + i];
    95 
    96  // The wmma instruction.
    97  nvcuda::wmma::mma_sync(elt_d, elt_a, elt_b, elt_c);
    98  }
    99  }
    100  }
    101 };
    102 
    104 
    105 } // namespace gemm
    106 } // namespace cutlass
    107 
    108 #endif // defined CUTLASS_USE_WMMA_API
    Abstractions for loading and storing matrices using the CUDA WMMA API.
    +
    Definition: convert.h:33
    +
    Shape< A_::kD/B_::kD, A_::kH/B_::kH, A_::kW/B_::kW, A_::kC/B_::kC > Shape
    Definition: shape.h:126
    +
    Kind
    Definition: matrix_traits.h:36
    +
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__shared__tile_8h.html b/docs/generated-html/wmma__gemm__shared__tile_8h.html new file mode 100644 index 00000000..e72f3a86 --- /dev/null +++ b/docs/generated-html/wmma__gemm__shared__tile_8h.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: wmma_gemm_shared_tile.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_shared_tile.h File Reference
    +
    +
    + +

    Defines iterator traits for efficiently loading and storing fragment to and from shared memory, specialized for WMMA GEMM. +More...

    + +

    Go to the source code of this file.

    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__shared__tile_8h_source.html b/docs/generated-html/wmma__gemm__shared__tile_8h_source.html new file mode 100644 index 00000000..fe565882 --- /dev/null +++ b/docs/generated-html/wmma__gemm__shared__tile_8h_source.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: wmma_gemm_shared_tile.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_shared_tile.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cutlass/wmma_matrix.h>
    32 #ifdef CUTLASS_USE_WMMA_API
    33 
    35 #include <cutlass/reshape_tile.h>
    36 
    37 namespace cutlass {
    38 namespace gemm {
    39 
    40 template <class>
    41 struct Debug {};
    42 
    44 
    45 template <MatrixLayout::Kind kLayout_,
    46  typename Scalar_,
    47  typename Tile_,
    48  typename Warps_,
    49  int kWarpStride_,
    50  typename Iterations_,
    51  typename Delta_,
    52  typename WmmaShape_>
    53 struct WmmaGemmSharedLoadTileATraits {
    55  static GemmOperand::Kind const kOperand = GemmOperand::kA;
    57  static MatrixLayout::Kind const kLayout = kLayout_;
    59  typedef Scalar_ Scalar;
    61  typedef Scalar const* Pointer;
    63  static int const kAccessSize = 1;
    65  typedef Tile_ Tile;
    67  typedef Warps_ Warps;
    69  static int const kWarpStride = kWarpStride_;
    71  typedef Iterations_ Iterations;
    73  typedef Delta_ Delta;
    75  typedef Delta_ ImmediateOffsetStrides;
    77  typedef WmmaShape_ WmmaShape;
    79  static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
    81  struct ThreadOffset {
    83  Coord<4> operator()() const {
    84  // The warp id.
    85  int const warp = threadIdx.x / kWarpSize;
    86  // The offset.
    87  int const offset = warp % Warps::kW * kWarpStride;
    88  return make_Coord(0, 0, offset, 0);
    89  }
    90  };
    91 };
    92 
    94 
    95 template <MatrixLayout::Kind kLayout_,
    96  typename Scalar_,
    97  typename Tile_,
    98  typename Warps_,
    99  int kWarpStride_,
    100  typename Iterations_,
    101  typename Delta_,
    102  typename WmmaShape_>
    103 struct WmmaGemmSharedLoadTileBTraits {
    105  static GemmOperand::Kind const kOperand = GemmOperand::kB;
    107  static MatrixLayout::Kind const kLayout = kLayout_;
    109  typedef Scalar_ Scalar;
    111  typedef Scalar const* Pointer;
    113  static int const kAccessSize = 1;
    115  typedef Tile_ Tile;
    117  typedef Warps_ Warps;
    119  static int const kWarpStride = kWarpStride_;
    121  typedef Iterations_ Iterations;
    123  typedef Delta_ Delta;
    125  typedef Delta_ ImmediateOffsetStrides;
    127  typedef WmmaShape_ WmmaShape;
    129  static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
    131  struct ThreadOffset {
    133  Coord<4> operator()() const {
    134  // The warp id.
    135  int const warp = threadIdx.x / kWarpSize;
    136  // The offset.
    137  int const offset = warp / Warps::kW * kWarpStride;
    138  return make_Coord(0, 0, offset, 0);
    139  }
    140  };
    141 };
    142 
    144 
    145 template <MatrixLayout::Kind kLayout_,
    146  typename Scalar_,
    147  typename OutputTile_,
    148  typename Warps_,
    149  typename WmmaShape_,
    150  int kSkew_ = 0>
    151 struct WmmaGemmSharedStoreTileDTraits {
    153  static GemmOperand::Kind const kOperand = GemmOperand::kC;
    155  static MatrixLayout::Kind const kLayout = kLayout_;
    157  typedef Scalar_ Scalar;
    158  // The access size
    159  static int const kAccessSize = 1;
    161  typedef Scalar* Pointer;
    163  typedef Warps_ Warps;
    165  typedef WmmaShape_ WmmaShape;
    167  static int const kSkew = kSkew_;
    169  static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
    171  typedef Shape<1, Warps_::kH * WmmaShape_::kH, OutputTile_::kW + kSkew_> Tile;
    173  typedef Shape<1, 1, OutputTile_::kW / Warps::kW / WmmaShape_::kW> Iterations;
    175  typedef Shape<0, 0, Warps::kW * WmmaShape_::kW, 0> Delta;
    177  typedef Shape<0, 0, Warps::kW * WmmaShape_::kW, 0> ImmediateOffsetStrides;
    178 
    180  struct ThreadOffset {
    182  Coord<4> operator()() const {
    183  // The warp id.
    184  int const warp = threadIdx.x / kWarpSize;
    185  // The starting column.
    186  int const h = warp / Warps::kW * WmmaShape::kH;
    187  // The w.
    188  int const w = warp % Warps::kW * WmmaShape::kW;
    189  // The offset.
    190  int const offset = h * Tile::kW + w;
    191  return make_Coord(0, 0, offset, 0);
    192  }
    193  };
    194 };
    195 
    197 
    198 template <typename Scalar_, typename Tile_, typename Threads_, int kScalarsPerLds_>
    199 struct WmmaGemmSharedLoadTileDTraits {
    201  typedef Scalar_ Scalar;
    203  typedef Scalar const* Pointer;
    205  static int const kAccessSize = kScalarsPerLds_;
    207  typedef typename ReshapeTile<Tile_, kScalarsPerLds_>::Tile Tile;
    209  typedef typename ReshapeThreads<Tile, Threads_>::Threads Threads;
    211  typedef Shape<1, Tile::kW * Tile::kC, Tile::kC> ThreadsStrides;
    213  static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
    214 
    216  typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kScalarsPerLds_> Delta;
    218  typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kScalarsPerLds_>
    219  ImmediateOffsetStrides;
    221  typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kScalarsPerLds_>
    222  Iterations;
    223 
    225  struct ThreadOffset {
    227  Coord<4> operator()() const {
    228  // The offset.
    230  return make_Coord(0, 0, offset, 0);
    231  }
    232  };
    233 };
    234 
    236 
    237 } // namespace gemm
    238 } // namespace cutlass
    239 
    240 #endif // defined CUTLASS_USE_WMMA_API
    static CUTLASS_DEVICE int get()
    Definition: shape.h:253
    +
    Abstractions for loading and storing matrices using the CUDA WMMA API.
    +
    Definition: load_store.h:42
    +
    Definition: convert.h:33
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:241
    +
    Definition: matrix_traits.h:43
    +
    Kind
    Definition: load_store.h:40
    +
    Defines a type for restructuring a tile.
    +
    Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
    +
    Definition: matrix_traits.h:43
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    Kind
    Definition: matrix_traits.h:36
    +
    Tile_ Tile
    Definition: reshape_tile.h:43
    +
    Kind
    Definition: matrix_traits.h:43
    +
    Definition: matrix_traits.h:43
    +
    Threads_ Threads
    Definition: gemm_global_tile.h:54
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__traits_8h.html b/docs/generated-html/wmma__gemm__traits_8h.html new file mode 100644 index 00000000..37eb3547 --- /dev/null +++ b/docs/generated-html/wmma__gemm__traits_8h.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: wmma_gemm_traits.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_traits.h File Reference
    +
    +
    + +

    Defies structural properties of GEMM targeting WMMA API in CUDA. +More...

    + +

    Go to the source code of this file.

    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__traits_8h_source.html b/docs/generated-html/wmma__gemm__traits_8h_source.html new file mode 100644 index 00000000..ad4cb28d --- /dev/null +++ b/docs/generated-html/wmma__gemm__traits_8h_source.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: wmma_gemm_traits.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_traits.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/wmma_matrix.h>
    31 #ifdef CUTLASS_USE_WMMA_API
    32 
    33 #include <cutlass/convert.h>
    34 #include <cutlass/gemm/gemm.h>
    43 
    44 namespace cutlass {
    45 namespace gemm {
    46 
    48 
    49 template <
    51  MatrixLayout::Kind kLayoutA_,
    53  MatrixLayout::Kind kLayoutB_,
    55  typename OutputTile_,
    57  typename ScalarC_,
    59  typename Accumulator_,
    61  typename AccumulatorsPerWarp_,
    63  typename InstructionShape_,
    65  int kScalarsPerLdgA_,
    67  int kScalarsPerLdgB_>
    68 struct WmmaGemmConfig : public GemmConfig<
    70  half,
    72  half,
    74  ScalarC_,
    76  ScalarC_,
    78  OutputTile_,
    80  WmmaGemmMultiplyAdd<kLayoutA_,
    81  half,
    82  kLayoutB_,
    83  half,
    84  MatrixLayout::kColumnMajor,
    85  Accumulator_,
    86  AccumulatorsPerWarp_,
    87  InstructionShape_>,
    89  kScalarsPerLdgA_,
    91  kScalarsPerLdgA_,
    93  8,
    95  kScalarsPerLdgB_,
    97  kScalarsPerLdgB_,
    99  8,
    101  16 / sizeof(ScalarC_),
    103  16 / sizeof(ScalarC_),
    105  16 / sizeof(ScalarC_),
    107  1> {};
    108 
    110 
    111 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
    112 struct WmmaGemmTileTraitsHelperA {};
    113 
    115 
    116 template <typename GemmConfig_>
    117 struct WmmaGemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_>
    118  : public GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> {
    120  typedef GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> Base;
    121 
    123  static int const kSkew = 16 / sizeof(typename Base::MultiplyAddScalar);
    125  typedef Shape<GemmConfig_::kStages,
    126  GemmConfig_::OutputTile::kD,
    127  GemmConfig_::OutputTile::kW + kSkew>
    128  Tile;
    129 
    131  typedef WmmaMatrix<GemmOperand::kA,
    133  typename Base::MultiplyAddScalar,
    134  typename GemmConfig_::InstructionShape>
    135  WmmaMatrix;
    136 
    138  typedef GemmSharedStoreTileAbTraits<
    139  // The pointer.
    140  typename Base::MultiplyAddScalar,
    141  // The tile has size KxM in GEMM's terminology.
    142  Tile,
    143  // The threads are distributed as warps x 32 (the traits may reorganize).
    144  typename Base::GlobalTileTraits::Threads,
    145  // The number of scalars per STS (STS.32 or STS.128, etc).
    146  GemmConfig_::kScalarsPerStsA>
    147  SharedStoreTileTraits;
    148 
    150  static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW;
    152  static int const kScalarsPerIteration = Tile::kW * GemmConfig_::InstructionShape::kD;
    154  typedef WmmaGemmSharedLoadTileATraits<
    155  // The layout of the matrix.
    157  // The pointer.
    158  typename Base::MultiplyAddScalar,
    159  // The output tile size.
    160  Tile,
    161  // The number of warps.
    162  typename GemmConfig_::Warps,
    163  // The strides between warps.
    164  GemmConfig_::InstructionShape::kW,
    165  // The number of iterations to load the data.
    166  Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>,
    167  // The stride between iterations.
    168  Shape<kScalarsPerIteration, 0, kScalarsPerW, 0>,
    169  // The shape of the instruction.
    170  typename GemmConfig_::InstructionShape>
    171  SharedLoadTileTraits;
    172 };
    173 
    175 
    176 template <typename GemmConfig_>
    177 struct WmmaGemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_> {
    179  static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor;
    180 
    182  typedef typename GemmConfig_::ScalarA Scalar;
    184  typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar;
    185 
    187  typedef WmmaMatrix<GemmOperand::kA,
    189  MultiplyAddScalar,
    190  typename GemmConfig_::InstructionShape>
    191  WmmaMatrix;
    192 
    194  typedef GemmGlobalTileTraits<
    195  // That's A.
    197  // A is row-major.
    199  // The pointer is float const.
    200  Scalar const,
    201  // The tile has size KxM in GEMM's terminology.
    202  Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>,
    203  // The threads are distributed as warps x 32 (the traits may reorganize).
    204  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
    205  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    206  GemmConfig_::kScalarsPerLdgA>
    207  GlobalTileTraits;
    208 
    210  static int const kSkew = 16 / sizeof(MultiplyAddScalar);
    212  typedef Shape<GemmConfig_::kStages,
    213  GemmConfig_::OutputTile::kW,
    214  GemmConfig_::OutputTile::kD + kSkew>
    215  Tile;
    216 
    218  typedef GemmSharedStoreTileAbTraits<
    219  // The pointer.
    220  MultiplyAddScalar,
    221  // The tile has size KxM in GEMM's terminology.
    222  Tile,
    223  // The threads are distributed as warps x 32 (the traits may reorganize).
    224  typename GlobalTileTraits::Threads,
    225  // The number of scalars per STS (STS.32 or STS.128, etc).
    226  GemmConfig_::kScalarsPerStsA>
    227  SharedStoreTileTraits;
    228 
    230  static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW;
    232  typedef WmmaGemmSharedLoadTileATraits<
    233  // The layout of the matrix.
    235  // The pointer.
    236  MultiplyAddScalar,
    237  // The tile in shared memory.
    238  Tile,
    239  // The number of warps.
    240  typename GemmConfig_::Warps,
    241  // The strides between warps.
    242  GemmConfig_::InstructionShape::kW * Tile::kW,
    243  // The number of iterations to load the data.
    244  Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>,
    245  // The stride between iterations.
    246  Shape<GemmConfig_::InstructionShape::kD, 0, kScalarsPerW * Tile::kW>,
    247  // The shape of the instruction.
    248  typename GemmConfig_::InstructionShape>
    249  SharedLoadTileTraits;
    250 };
    251 
    253 
    254 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
    255 struct WmmaGemmTileTraitsHelperB {};
    256 
    258 
    259 template <typename GemmConfig_>
    260 struct WmmaGemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_>
    261  : public GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> {
    263  typedef GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> Base;
    264 
    266  static int const kSkew = 16 / sizeof(typename Base::MultiplyAddScalar);
    268  typedef Shape<GemmConfig_::kStages,
    269  GemmConfig_::OutputTile::kD,
    270  GemmConfig_::OutputTile::kH + kSkew>
    271  Tile;
    272 
    274  typedef WmmaMatrix<GemmOperand::kB,
    276  typename Base::MultiplyAddScalar,
    277  typename GemmConfig_::InstructionShape>
    278  WmmaMatrix;
    279 
    281  typedef GemmSharedStoreTileAbTraits<
    282  // The pointer.
    283  typename Base::MultiplyAddScalar,
    284  // The tile has size KxM in GEMM's terminology.
    285  Tile,
    286  // The threads are distributed as warps x 32 (the traits may reorganize).
    287  typename Base::GlobalTileTraits::Threads,
    288  // The number of scalars per STS (STS.32 or STS.128, etc).
    289  GemmConfig_::kScalarsPerStsB>
    290  SharedStoreTileTraits;
    291 
    293  static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH;
    295  static int const kScalarsPerIteration = Tile::kW * GemmConfig_::InstructionShape::kD;
    297  typedef WmmaGemmSharedLoadTileBTraits<
    298  // The layout of the matrix.
    300  // The pointer.
    301  typename Base::MultiplyAddScalar,
    302  // The output tile size.
    303  Tile,
    304  // The number of warps.
    305  typename GemmConfig_::Warps,
    306  // The strides between warps.
    307  GemmConfig_::InstructionShape::kH,
    308  // The number of iterations to load the data.
    309  Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>,
    310  // The stride between iterations.
    311  Shape<kScalarsPerIteration, 0, kScalarsPerW, 0>,
    312  // The shape of the instruction.
    313  typename GemmConfig_::InstructionShape>
    314  SharedLoadTileTraits;
    315 };
    316 
    318 
    319 template <typename GemmConfig_>
    320 struct WmmaGemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_> {
    322  static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor;
    323 
    325  typedef typename GemmConfig_::ScalarB Scalar;
    327  typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar;
    328 
    330  typedef WmmaMatrix<GemmOperand::kB,
    332  MultiplyAddScalar,
    333  typename GemmConfig_::InstructionShape>
    334  WmmaMatrix;
    335 
    337  typedef GemmGlobalTileTraits<
    338  // That's B.
    340  // A is row-major.
    342  // The pointer is float const.
    343  Scalar const,
    344  // The tile has size KxM in GEMM's terminology.
    345  Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>,
    346  // The threads are distributed as warps x 32 (the traits may reorganize).
    347  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
    348  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    349  GemmConfig_::kScalarsPerLdgB>
    350  GlobalTileTraits;
    351 
    353  static int const kSkew = 16 / sizeof(MultiplyAddScalar);
    355  typedef Shape<GemmConfig_::kStages,
    356  GemmConfig_::OutputTile::kH,
    357  GemmConfig_::OutputTile::kD + kSkew>
    358  Tile;
    359 
    361  typedef GemmSharedStoreTileAbTraits<
    362  // The pointer.
    363  MultiplyAddScalar,
    364  // The tile has size KxM in GEMM's terminology.
    365  Tile,
    366  // The threads are distributed as warps x 32 (the traits may reorganize).
    367  typename GlobalTileTraits::Threads,
    368  // The number of scalars per STS (STS.32 or STS.128, etc).
    369  GemmConfig_::kScalarsPerStsB>
    370  SharedStoreTileTraits;
    371 
    373  static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH;
    375  typedef WmmaGemmSharedLoadTileBTraits<
    376  // The layout of the matrix.
    378  // The pointer.
    379  MultiplyAddScalar,
    380  // The tile in shared memory.
    381  Tile,
    382  // The number of warps.
    383  typename GemmConfig_::Warps,
    384  // The strides between warps.
    385  GemmConfig_::InstructionShape::kH * Tile::kW,
    386  // The number of iterations to load the data.
    387  Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>,
    388  // The stride between iterations.
    389  Shape<GemmConfig_::InstructionShape::kD, 0, kScalarsPerW * Tile::kW>,
    390  // The shape of the instruction.
    391  typename GemmConfig_::InstructionShape>
    392  SharedLoadTileTraits;
    393 };
    394 
    396 
    397 template <
    399  MatrixLayout::Kind kLayoutA_,
    401  MatrixLayout::Kind kLayoutB_,
    403  typename OutputTile_,
    405  typename ScalarC_,
    407  typename Accumulator_,
    409  typename EpilogueFunctor_,
    411  typename AccumulatorsPerWarp_,
    413  typename InstructionShape_,
    415  int kScalarsPerLdgA_,
    417  int kScalarsPerLdgB_,
    419  typename Index_>
    420 struct WmmaGemmTraitsHelper {
    422  typedef WmmaGemmConfig<kLayoutA_,
    423  kLayoutB_,
    424  OutputTile_,
    425  ScalarC_,
    426  Accumulator_,
    427  AccumulatorsPerWarp_,
    428  InstructionShape_,
    429  kScalarsPerLdgA_,
    430  kScalarsPerLdgB_>
    431  GemmConfig;
    432 
    434  typedef WmmaGemmTileTraitsHelperA<kLayoutA_, GemmConfig> GemmTileTraitsHelperA;
    436  typedef WmmaGemmTileTraitsHelperB<kLayoutB_, GemmConfig> GemmTileTraitsHelperB;
    437 
    439  typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperA::GlobalTileTraits, Index_>
    440  GlobalLoadIteratorA;
    442  typedef Copy<typename GlobalLoadIteratorA::Fragment> GlobalTransformerA;
    444  typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits,
    445  typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
    448  SharedStoreIteratorA;
    450  typedef GlobalLoadStream<GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA>
    451  GlobalLoadStreamA;
    452 
    454  typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperB::GlobalTileTraits, Index_>
    455  GlobalLoadIteratorB;
    456  // The default transformer for B.
    457  typedef Copy<typename GlobalLoadIteratorB::Fragment> GlobalTransformerB;
    459  typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits,
    460  typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
    463  SharedStoreIteratorB;
    465  typedef GlobalLoadStream<GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB>
    466  GlobalLoadStreamB;
    467 
    469  typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits,
    470  typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
    473  Index_,
    474  typename GemmTileTraitsHelperA::WmmaMatrix,
    476  SharedLoadIteratorA;
    478  typedef SharedLoadStream<SharedLoadIteratorA> SharedLoadStreamA;
    480  typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits,
    481  typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
    484  Index_,
    485  typename GemmTileTraitsHelperB::WmmaMatrix,
    487  SharedLoadIteratorB;
    489  typedef SharedLoadStream<SharedLoadIteratorB> SharedLoadStreamB;
    490 
    492  typedef typename GemmConfig::MultiplyAdd MultiplyAdd;
    494  typedef ClearAccumulators<typename MultiplyAdd::ScalarC> ClearAccumulators;
    495 
    497  typedef WmmaGemmEpilogueTraitsHelper<GemmConfig, EpilogueFunctor_, Index_> EpilogueTraitsHelper;
    499  typedef SimplifiedGemmEpilogueTraits<GemmConfig, EpilogueFunctor_, Index_, EpilogueTraitsHelper>
    500  GemmEpilogueTraits;
    502  typedef GemmEpilogue<GemmEpilogueTraits> Epilogue;
    503 };
    504 
    506 
    507 template <typename OutputTile_, typename DefaultShape_ = Shape<64, 32, 64> >
    508 struct WmmaGemmAccumulatorsPerWarp {
    509  typedef typename ShapeMin<OutputTile_, DefaultShape_>::Shape Shape;
    510 };
    511 
    513 
    514 template <
    516  MatrixLayout::Kind kLayoutA_,
    518  MatrixLayout::Kind kLayoutB_,
    520  typename OutputTile_ = Shape<64, 128, 128>,
    522  typename ScalarC_ = float,
    524  typename EpilogueFunctor_ = LinearScaling<ScalarC_>,
    526  typename Accumulator_ = ScalarC_,
    528  typename AccumulatorsPerWarp_ = typename WmmaGemmAccumulatorsPerWarp<OutputTile_>::Shape,
    530  typename InstructionShape_ = Shape<16, 16, 16>,
    532  int kScalarsPerLdgA_ = 8,
    534  int kScalarsPerLdgB_ = 8,
    536  typename Index_ = int,
    538  typename Helper_ = WmmaGemmTraitsHelper<kLayoutA_,
    539  kLayoutB_,
    540  OutputTile_,
    541  ScalarC_,
    542  Accumulator_,
    543  EpilogueFunctor_,
    544  AccumulatorsPerWarp_,
    545  InstructionShape_,
    546  kScalarsPerLdgA_,
    547  kScalarsPerLdgB_,
    548  Index_> >
    549 struct WmmaGemmTraits : public GemmTraits<
    550  // The config.
    551  typename Helper_::GemmConfig,
    552  // The stream to load A from global memory to shared memory.
    553  typename Helper_::GlobalLoadStreamA,
    554  // The stream to load B from global memory to shared memory.
    555  typename Helper_::GlobalLoadStreamB,
    556  // The stream to load A from shared memory.
    557  typename Helper_::SharedLoadStreamA,
    558  // The stream to load B from shared memory.
    559  typename Helper_::SharedLoadStreamB,
    560  // The epilogue.
    561  typename Helper_::Epilogue,
    562  // The block swizzle to reorganize the grid.
    563  IdentityBlockSwizzle,
    564  // The index.
    565  Index_,
    566  // The tool used to clear accumulators.
    567  typename Helper_::ClearAccumulators> {};
    568 
    570 
    571 } // namespace gemm
    572 } // namespace cutlass
    573 
    574 #endif // defined CUTLASS_USE_WMMA_API
    Abstractions for loading and storing matrices using the CUDA WMMA API.
    +
    MultiplyAdd_ MultiplyAdd
    The functor to do D = A*B + C.
    Definition: gemm_traits.h:93
    +
    Definition: load_store.h:42
    +
    Definition: convert.h:33
    +
    Defines iterators for efficiently loading and storing to global memory.
    +
    Defines structural properties of complete GEMM computation.
    +
    Defines structural properties of WMMA GEMM&#39;s epilogue phase.
    +
    Definition: tile_iterator.h:62
    +
    Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
    +
    Defines iterators for efficiently loading and storing tiles to and from shared memory.
    +
    Definition: matrix_traits.h:36
    +
    Definition: tile_iterator.h:67
    +
    Definition: matrix_traits.h:43
    +
    Defines tile iterator traits for loading thread block-level tile from global memory.
    +
    Definition: matrix_traits.h:36
    +
    Kind
    Definition: matrix_traits.h:36
    +
    Implements warp-level matrix multiply-accumulate operation using CUDA WMMA API.
    +
    Definition: matrix_traits.h:43
    +
    Implements a software-pipelined efficient GEMM.
    +
    Defines structural properties of the GEMM epilogue.
    +
    Shape<(A_::kD< B_::kD ? A_::kD :B_::kD),(A_::kH< B_::kH ? A_::kH :B_::kH),(A_::kW< B_::kW ? A_::kW :B_::kW),(A_::kC< B_::kC ? A_::kC :B_::kC)> Shape
    Definition: shape.h:148
    +
    Defines conversion operations among Fragments of different base type.
    +
    + + + + diff --git a/docs/generated-html/wmma__matrix_8h.html b/docs/generated-html/wmma__matrix_8h.html new file mode 100644 index 00000000..fd1ab8cf --- /dev/null +++ b/docs/generated-html/wmma__matrix_8h.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: wmma_matrix.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_matrix.h File Reference
    +
    +
    + +

    Abstractions for loading and storing matrices using the CUDA WMMA API. +More...

    + +

    Go to the source code of this file.

    +
    + + + + diff --git a/docs/generated-html/wmma__matrix_8h_source.html b/docs/generated-html/wmma__matrix_8h_source.html new file mode 100644 index 00000000..9e91b685 --- /dev/null +++ b/docs/generated-html/wmma__matrix_8h_source.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: wmma_matrix.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_matrix.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #if defined(__CUDACC__) && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700)
    31 
    32 // Dependent header files should use the following macro to guard all code using
    33 // nvcuda::wmma:: to enable compilation for CUDA Compute Capabilities < sm_70.
    34 // Earlier shader models not support Tensor Cores.
    35 #define CUTLASS_USE_WMMA_API
    36 
    37 #include "stdio.h"
    38 
    39 #include <crt/mma.h>
    40 #include <cutlass/fragment.h>
    41 #include <cutlass/load_store.h>
    42 #include <cutlass/matrix_traits.h>
    43 #include <cutlass/shape.h>
    44 #include <cutlass/vector.h>
    45 
    46 namespace cutlass {
    47 
    49 
    51 template <MatrixLayout::Kind kLayout_>
    52 struct WmmaLayout {
    53  typedef nvcuda::wmma::col_major Layout;
    54 };
    55 
    57 template <>
    58 struct WmmaLayout<MatrixLayout::kRowMajor> {
    59  typedef nvcuda::wmma::row_major Layout;
    60 };
    61 
    63 
    65 template <GemmOperand::Kind kOperand_,
    66  MatrixLayout::Kind kLayout_,
    67  typename Scalar_,
    68  typename WmmaShape_>
    69 struct WmmaMatrix {};
    70 
    72 
    74 template <MatrixLayout::Kind kLayout_, typename Scalar_, typename WmmaShape_>
    75 struct WmmaMatrix<GemmOperand::kA, kLayout_, Scalar_, WmmaShape_>
    76  : public nvcuda::wmma::fragment<
    78  nvcuda::wmma::matrix_a,
    80  WmmaShape_::kW,
    81  WmmaShape_::kH,
    82  WmmaShape_::kD,
    84  Scalar_,
    86  typename WmmaLayout<kLayout_>::Layout> {
    88  typedef WmmaMatrix<GemmOperand::kA, kLayout_, Scalar_, WmmaShape_> This_;
    89 
    91  CUTLASS_DEVICE This_& operator=(Scalar_ const& x) {
    92  nvcuda::wmma::fill_fragment(*this, x);
    93  return *this;
    94  }
    95 
    97  CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) {
    98  nvcuda::wmma::load_matrix_sync(*this, pointer, stride);
    99  }
    100 
    102  CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const {
    103  nvcuda::wmma::store_matrix_sync(pointer, *this, stride);
    104  }
    105 };
    106 
    108 
    110 template <MatrixLayout::Kind kLayout_, typename Scalar_, typename WmmaShape_>
    111 struct WmmaMatrix<GemmOperand::kB, kLayout_, Scalar_, WmmaShape_>
    112  : public nvcuda::wmma::fragment<
    114  nvcuda::wmma::matrix_b,
    116  WmmaShape_::kW,
    117  WmmaShape_::kH,
    118  WmmaShape_::kD,
    120  Scalar_,
    122  typename WmmaLayout<kLayout_>::Layout> {
    124  typedef WmmaMatrix<GemmOperand::kB, kLayout_, Scalar_, WmmaShape_> This_;
    125 
    127  CUTLASS_DEVICE This_& operator=(Scalar_ const& x) {
    128  nvcuda::wmma::fill_fragment(*this, x);
    129  return *this;
    130  }
    131 
    133  CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) {
    134  nvcuda::wmma::load_matrix_sync(*this, pointer, stride);
    135  }
    136 
    138  CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const {
    139  nvcuda::wmma::store_matrix_sync(pointer, *this, stride);
    140  }
    141 };
    142 
    144 
    146 template <MatrixLayout::Kind kLayout_, typename Scalar_, typename WmmaShape_>
    147 struct WmmaMatrix<GemmOperand::kC, kLayout_, Scalar_, WmmaShape_>
    148  : public nvcuda::wmma::fragment<
    150  nvcuda::wmma::accumulator,
    152  WmmaShape_::kW,
    153  WmmaShape_::kH,
    154  WmmaShape_::kD,
    156  Scalar_> {
    158  typedef WmmaMatrix<GemmOperand::kC, kLayout_, Scalar_, WmmaShape_> This_;
    160  static MatrixLayout::Kind const kLayout = kLayout_;
    161 
    163  CUTLASS_DEVICE This_& operator=(Scalar_ const& x) {
    164  nvcuda::wmma::fill_fragment(*this, x);
    165  return *this;
    166  }
    167 
    169  CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) {
    170  bool const kIsRowMajor = kLayout == MatrixLayout::kRowMajor;
    171  nvcuda::wmma::load_matrix_sync(
    172  *this,
    173  pointer,
    174  stride,
    175  kIsRowMajor ? nvcuda::wmma::mem_row_major : nvcuda::wmma::mem_col_major);
    176  }
    177 
    179  CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const {
    180  bool const kIsRowMajor = kLayout == MatrixLayout::kRowMajor;
    181  nvcuda::wmma::store_matrix_sync(
    182  pointer,
    183  *this,
    184  stride,
    185  kIsRowMajor ? nvcuda::wmma::mem_row_major : nvcuda::wmma::mem_col_major);
    186  }
    187 };
    188 
    190 
    191 } // namespace cutlass
    192 
    193 #endif // defined CUTLASS_USE_WMMA_API
    Definition: convert.h:33
    +
    Definition: matrix_traits.h:36
    +
    Defines abstractions for efficiently loading and storing vectors to memory.
    +
    Defines a 1D vector of elements held in the registers of each thread.
    +
    Kind
    Definition: matrix_traits.h:36
    +
    Kind
    Definition: matrix_traits.h:43
    +
    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
    +
    Defines properties of matrices used to denote layout and operands to GEMM kernels.
    +
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    +
    + + + + diff --git a/media/cutlass-performance-plot.png b/media/cutlass-performance-plot.png deleted file mode 100644 index 96171ed0..00000000 Binary files a/media/cutlass-performance-plot.png and /dev/null differ diff --git a/media/fig-09-complete-hierarchy.png b/media/fig-09-complete-hierarchy.png deleted file mode 100644 index 0419523b..00000000 Binary files a/media/fig-09-complete-hierarchy.png and /dev/null differ diff --git a/media/images/cutlass-performance-plot.png b/media/images/cutlass-performance-plot.png new file mode 100644 index 00000000..f61c2e50 Binary files /dev/null and b/media/images/cutlass-performance-plot.png differ diff --git a/media/images/gemm-hierarchy-with-epilogue-no-labels.png b/media/images/gemm-hierarchy-with-epilogue-no-labels.png new file mode 100644 index 00000000..a53b99fe Binary files /dev/null and b/media/images/gemm-hierarchy-with-epilogue-no-labels.png differ diff --git a/media/images/gemm-hierarchy-with-epilogue.png b/media/images/gemm-hierarchy-with-epilogue.png new file mode 100644 index 00000000..dde58aa8 Binary files /dev/null and b/media/images/gemm-hierarchy-with-epilogue.png differ diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 00000000..31f3594f --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include_directories("external/googletest/googletest/include") +add_subdirectory(external/googletest/googletest) +add_subdirectory(test) +add_subdirectory(nvrtc) diff --git a/tools/external/googletest/.gitignore b/tools/external/googletest/.gitignore new file mode 100644 index 00000000..4cea432f --- /dev/null +++ b/tools/external/googletest/.gitignore @@ -0,0 +1,24 @@ +# Ignore CI build directory +build/ +xcuserdata +cmake-build-debug/ +.idea/ +bazel-bin +bazel-genfiles +bazel-googletest +bazel-out +bazel-testlogs +# python +*.pyc + +# Visual Studio files +*.sdf +*.opensdf +*.VC.opendb +*.suo +*.user +_ReSharper.Caches/ +Win32-Debug/ +Win32-Release/ +x64-Debug/ +x64-Release/ diff --git a/tools/external/googletest/.travis.yml b/tools/external/googletest/.travis.yml new file mode 100644 index 00000000..4afad4ae --- /dev/null +++ b/tools/external/googletest/.travis.yml @@ -0,0 +1,74 @@ +# Build matrix / environment variable are explained on: +# http://about.travis-ci.org/docs/user/build-configuration/ +# This file can be validated on: +# http://lint.travis-ci.org/ + +sudo: false +language: cpp + +# Define the matrix explicitly, manually expanding the combinations of (os, compiler, env). +# It is more tedious, but grants us far more flexibility. +matrix: + include: + - os: linux + compiler: gcc + sudo: true + cache: + install: ./ci/install-linux.sh && ./ci/log-config.sh + script: ./ci/build-linux-bazel.sh + - os: linux + compiler: clang + sudo: true + cache: + install: ./ci/install-linux.sh && ./ci/log-config.sh + script: ./ci/build-linux-bazel.sh + - os: linux + compiler: gcc + env: BUILD_TYPE=Debug VERBOSE=1 + - os: linux + compiler: gcc + env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 + - os: linux + compiler: clang + env: BUILD_TYPE=Debug VERBOSE=1 + - os: linux + compiler: clang + env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 + - os: osx + compiler: gcc + env: BUILD_TYPE=Debug VERBOSE=1 + - os: osx + compiler: gcc + env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 + - os: osx + compiler: clang + env: BUILD_TYPE=Debug VERBOSE=1 + - os: osx + compiler: clang + env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 + +# These are the install and build (script) phases for the most common entries in the matrix. They could be included +# in each entry in the matrix, but that is just repetitive. +install: + - ./ci/install-${TRAVIS_OS_NAME}.sh + - . ./ci/env-${TRAVIS_OS_NAME}.sh + - ./ci/log-config.sh + +script: ./ci/travis.sh + +# For sudo=false builds this section installs the necessary dependencies. +addons: + apt: + # List of whitelisted in travis packages for ubuntu-precise can be found here: + # https://github.com/travis-ci/apt-package-whitelist/blob/master/ubuntu-precise + # List of whitelisted in travis apt-sources: + # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json + sources: + - ubuntu-toolchain-r-test + - llvm-toolchain-precise-3.7 + packages: + - g++-4.9 + - clang-3.7 + +notifications: + email: false diff --git a/tools/external/googletest/BUILD.bazel b/tools/external/googletest/BUILD.bazel new file mode 100644 index 00000000..a4423740 --- /dev/null +++ b/tools/external/googletest/BUILD.bazel @@ -0,0 +1,147 @@ +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Author: misterg@google.com (Gennadiy Civil) +# +# Bazel Build for Google C++ Testing Framework(Google Test) + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +config_setting( + name = "win", + values = {"cpu": "x64_windows_msvc"}, +) + +# Google Test including Google Mock +cc_library( + name = "gtest", + srcs = glob( + include = [ + "googletest/src/*.cc", + "googletest/src/*.h", + "googletest/include/gtest/**/*.h", + "googlemock/src/*.cc", + "googlemock/include/gmock/**/*.h", + ], + exclude = [ + "googletest/src/gtest-all.cc", + "googletest/src/gtest_main.cc", + "googlemock/src/gmock-all.cc", + "googlemock/src/gmock_main.cc", + ], + ), + hdrs =glob([ + "googletest/include/gtest/*.h", + "googlemock/include/gmock/*.h", + ]), + copts = select( + { + ":win": [], + "//conditions:default": ["-pthread"], + }, + ), + includes = [ + "googlemock", + "googlemock/include", + "googletest", + "googletest/include", + ], + linkopts = select({ + ":win": [], + "//conditions:default": [ + "-pthread", + ], + }), +) + +cc_library( + name = "gtest_main", + srcs = [ + "googlemock/src/gmock_main.cc", + ], + deps = ["//:gtest"], +) + +# The following rules build samples of how to use gTest. +cc_library( + name = "gtest_sample_lib", + srcs = [ + "googletest/samples/sample1.cc", + "googletest/samples/sample2.cc", + "googletest/samples/sample4.cc", + ], + hdrs = [ + "googletest/samples/prime_tables.h", + "googletest/samples/sample1.h", + "googletest/samples/sample2.h", + "googletest/samples/sample3-inl.h", + "googletest/samples/sample4.h", + ], +) + +cc_test( + name = "gtest_samples", + size = "small", + #All Samples except: + #sample9 ( main ) + #sample10 (main and takes a command line option and needs to be separate) + srcs = [ + "googletest/samples/sample1_unittest.cc", + "googletest/samples/sample2_unittest.cc", + "googletest/samples/sample3_unittest.cc", + "googletest/samples/sample4_unittest.cc", + "googletest/samples/sample5_unittest.cc", + "googletest/samples/sample6_unittest.cc", + "googletest/samples/sample7_unittest.cc", + "googletest/samples/sample8_unittest.cc", + ], + deps = [ + "gtest_sample_lib", + ":gtest_main", + ], +) + +cc_test( + name = "sample9_unittest", + size = "small", + srcs = ["googletest/samples/sample9_unittest.cc"], + deps = [":gtest"], +) + +cc_test( + name = "sample10_unittest", + size = "small", + srcs = ["googletest/samples/sample10_unittest.cc"], + deps = [ + ":gtest", + ], +) diff --git a/tools/external/googletest/CMakeLists.txt b/tools/external/googletest/CMakeLists.txt new file mode 100644 index 00000000..f8a97faa --- /dev/null +++ b/tools/external/googletest/CMakeLists.txt @@ -0,0 +1,33 @@ +cmake_minimum_required(VERSION 2.6.4) + +if (POLICY CMP0048) + cmake_policy(SET CMP0048 NEW) +endif (POLICY CMP0048) + +project( googletest-distribution ) + +enable_testing() + +include(CMakeDependentOption) +if (CMAKE_VERSION VERSION_LESS 2.8.5) + set(CMAKE_INSTALL_BINDIR "bin" CACHE STRING "User executables (bin)") + set(CMAKE_INSTALL_LIBDIR "lib${LIB_SUFFIX}" CACHE STRING "Object code libraries (lib)") + set(CMAKE_INSTALL_INCLUDEDIR "include" CACHE STRING "C header files (include)") + mark_as_advanced(CMAKE_INSTALL_BINDIR CMAKE_INSTALL_LIBDIR CMAKE_INSTALL_INCLUDEDIR) +else() + include(GNUInstallDirs) +endif() + +option(BUILD_GTEST "Builds the googletest subproject" OFF) + +#Note that googlemock target already builds googletest +option(BUILD_GMOCK "Builds the googlemock subproject" ON) + +cmake_dependent_option(INSTALL_GTEST "Enable installation of googletest. (Projects embedding googletest may want to turn this OFF.)" ON "BUILD_GTEST OR BUILD_GMOCK" OFF) +cmake_dependent_option(INSTALL_GMOCK "Enable installation of googlemock. (Projects embedding googlemock may want to turn this OFF.)" ON "BUILD_GMOCK" OFF) + +if(BUILD_GMOCK) + add_subdirectory( googlemock ) +elseif(BUILD_GTEST) + add_subdirectory( googletest ) +endif() diff --git a/tools/external/googletest/README.md b/tools/external/googletest/README.md new file mode 100644 index 00000000..f858833d --- /dev/null +++ b/tools/external/googletest/README.md @@ -0,0 +1,148 @@ + +# Google Test # + +[![Build Status](https://travis-ci.org/google/googletest.svg?branch=master)](https://travis-ci.org/google/googletest) +[![Build status](https://ci.appveyor.com/api/projects/status/4o38plt0xbo1ubc8/branch/master?svg=true)](https://ci.appveyor.com/project/GoogleTestAppVeyor/googletest/branch/master) + +Welcome to **Google Test**, Google's C++ test framework! + +This repository is a merger of the formerly separate GoogleTest and +GoogleMock projects. These were so closely related that it makes sense to +maintain and release them together. + +Please see the project page above for more information as well as the +mailing list for questions, discussions, and development. There is +also an IRC channel on [OFTC](https://webchat.oftc.net/) (irc.oftc.net) #gtest available. Please +join us! + +Getting started information for **Google Test** is available in the +[Google Test Primer](googletest/docs/Primer.md) documentation. + +**Google Mock** is an extension to Google Test for writing and using C++ mock +classes. See the separate [Google Mock documentation](googlemock/README.md). + +More detailed documentation for googletest (including build instructions) are +in its interior [googletest/README.md](googletest/README.md) file. + +## Features ## + + * An [xUnit](https://en.wikipedia.org/wiki/XUnit) test framework. + * Test discovery. + * A rich set of assertions. + * User-defined assertions. + * Death tests. + * Fatal and non-fatal failures. + * Value-parameterized tests. + * Type-parameterized tests. + * Various options for running the tests. + * XML test report generation. + +## Platforms ## + +Google test has been used on a variety of platforms: + + * Linux + * Mac OS X + * Windows + * Cygwin + * MinGW + * Windows Mobile + * Symbian + +## Who Is Using Google Test? ## + +In addition to many internal projects at Google, Google Test is also used by +the following notable projects: + + * The [Chromium projects](http://www.chromium.org/) (behind the Chrome + browser and Chrome OS). + * The [LLVM](http://llvm.org/) compiler. + * [Protocol Buffers](https://github.com/google/protobuf), Google's data + interchange format. + * The [OpenCV](http://opencv.org/) computer vision library. + * [tiny-dnn](https://github.com/tiny-dnn/tiny-dnn): header only, dependency-free deep learning framework in C++11. + +## Related Open Source Projects ## + +[GTest Runner](https://github.com/nholthaus/gtest-runner) is a Qt5 based automated test-runner and Graphical User Interface with powerful features for Windows and Linux platforms. + +[Google Test UI](https://github.com/ospector/gtest-gbar) is test runner that runs +your test binary, allows you to track its progress via a progress bar, and +displays a list of test failures. Clicking on one shows failure text. Google +Test UI is written in C#. + +[GTest TAP Listener](https://github.com/kinow/gtest-tap-listener) is an event +listener for Google Test that implements the +[TAP protocol](https://en.wikipedia.org/wiki/Test_Anything_Protocol) for test +result output. If your test runner understands TAP, you may find it useful. + +[gtest-parallel](https://github.com/google/gtest-parallel) is a test runner that +runs tests from your binary in parallel to provide significant speed-up. + +## Requirements ## + +Google Test is designed to have fairly minimal requirements to build +and use with your projects, but there are some. Currently, we support +Linux, Windows, Mac OS X, and Cygwin. We will also make our best +effort to support other platforms (e.g. Solaris, AIX, and z/OS). +However, since core members of the Google Test project have no access +to these platforms, Google Test may have outstanding issues there. If +you notice any problems on your platform, please notify +[googletestframework@googlegroups.com](https://groups.google.com/forum/#!forum/googletestframework). Patches for fixing them are +even more welcome! + +### Linux Requirements ### + +These are the base requirements to build and use Google Test from a source +package (as described below): + + * GNU-compatible Make or gmake + * POSIX-standard shell + * POSIX(-2) Regular Expressions (regex.h) + * A C++98-standard-compliant compiler + +### Windows Requirements ### + + * Microsoft Visual C++ 2010 or newer + +### Cygwin Requirements ### + + * Cygwin v1.5.25-14 or newer + +### Mac OS X Requirements ### + + * Mac OS X v10.4 Tiger or newer + * Xcode Developer Tools + +### Requirements for Contributors ### + +We welcome patches. If you plan to contribute a patch, you need to +build Google Test and its own tests from a git checkout (described +below), which has further requirements: + + * [Python](https://www.python.org/) v2.3 or newer (for running some of + the tests and re-generating certain source files from templates) + * [CMake](https://cmake.org/) v2.6.4 or newer + +## Regenerating Source Files ## + +Some of Google Test's source files are generated from templates (not +in the C++ sense) using a script. +For example, the +file include/gtest/internal/gtest-type-util.h.pump is used to generate +gtest-type-util.h in the same directory. + +You don't need to worry about regenerating the source files +unless you need to modify them. You would then modify the +corresponding `.pump` files and run the '[pump.py](googletest/scripts/pump.py)' +generator script. See the [Pump Manual](googletest/docs/PumpManual.md). + +### Contributing Code ### + +We welcome patches. Please read the +[Developer's Guide](googletest/docs/DevGuide.md) +for how you can contribute. In particular, make sure you have signed +the Contributor License Agreement, or we won't be able to accept the +patch. + +Happy testing! diff --git a/tools/external/googletest/WORKSPACE b/tools/external/googletest/WORKSPACE new file mode 100644 index 00000000..106b824e --- /dev/null +++ b/tools/external/googletest/WORKSPACE @@ -0,0 +1 @@ +workspace(name = "com_google_googletest") diff --git a/tools/external/googletest/appveyor.yml b/tools/external/googletest/appveyor.yml new file mode 100644 index 00000000..4e8d6f6e --- /dev/null +++ b/tools/external/googletest/appveyor.yml @@ -0,0 +1,96 @@ +version: '{build}' + +os: Visual Studio 2015 + +environment: + matrix: + - compiler: msvc-15-seh + generator: "Visual Studio 15 2017" + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + + - compiler: msvc-15-seh + generator: "Visual Studio 15 2017 Win64" + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + + - compiler: msvc-14-seh + generator: "Visual Studio 14 2015" + + - compiler: msvc-14-seh + generator: "Visual Studio 14 2015 Win64" + + - compiler: msvc-12-seh + generator: "Visual Studio 12 2013" + + - compiler: msvc-12-seh + generator: "Visual Studio 12 2013 Win64" + + - compiler: msvc-11-seh + generator: "Visual Studio 11 2012" + + - compiler: msvc-11-seh + generator: "Visual Studio 11 2012 Win64" + + - compiler: msvc-10-seh + generator: "Visual Studio 10 2010" + + - compiler: gcc-5.3.0-posix + generator: "MinGW Makefiles" + cxx_path: 'C:\mingw-w64\i686-5.3.0-posix-dwarf-rt_v4-rev0\mingw32\bin' + + - compiler: gcc-6.3.0-posix + generator: "MinGW Makefiles" + cxx_path: 'C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin' + +configuration: + - Debug + #- Release + +build: + verbosity: minimal + +install: +- ps: | + Write-Output "Compiler: $env:compiler" + Write-Output "Generator: $env:generator" + + # git bash conflicts with MinGW makefiles + if ($env:generator -eq "MinGW Makefiles") { + $env:path = $env:path.replace("C:\Program Files\Git\usr\bin;", "") + if ($env:cxx_path -ne "") { + $env:path += ";$env:cxx_path" + } + } + +build_script: +- ps: | + md _build -Force | Out-Null + cd _build + + $conf = if ($env:generator -eq "MinGW Makefiles") {"-DCMAKE_BUILD_TYPE=$env:configuration"} else {"-DCMAKE_CONFIGURATION_TYPES=Debug;Release"} + # Disable test for MinGW (gtest tests fail, gmock tests can not build) + $gtest_build_tests = if ($env:generator -eq "MinGW Makefiles") {"-Dgtest_build_tests=OFF"} else {"-Dgtest_build_tests=ON"} + $gmock_build_tests = if ($env:generator -eq "MinGW Makefiles") {"-Dgmock_build_tests=OFF"} else {"-Dgmock_build_tests=ON"} + & cmake -G "$env:generator" $conf -Dgtest_build_samples=ON $gtest_build_tests $gmock_build_tests .. + if ($LastExitCode -ne 0) { + throw "Exec: $ErrorMessage" + } + & cmake --build . --config $env:configuration + if ($LastExitCode -ne 0) { + throw "Exec: $ErrorMessage" + } + +test_script: +- ps: | + if ($env:generator -eq "MinGW Makefiles") { + return # No test available for MinGW + } + & ctest -C $env:configuration --timeout 300 --output-on-failure + if ($LastExitCode -ne 0) { + throw "Exec: $ErrorMessage" + } + +artifacts: + - path: '_build/CMakeFiles/*.log' + name: logs + - path: '_build/Testing/**/*.xml' + name: test_results diff --git a/tools/external/googletest/ci/build-linux-bazel.sh b/tools/external/googletest/ci/build-linux-bazel.sh new file mode 100755 index 00000000..2f63896a --- /dev/null +++ b/tools/external/googletest/ci/build-linux-bazel.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -e + +bazel build --curses=no //...:all +bazel test --curses=no //...:all diff --git a/tools/external/googletest/ci/env-linux.sh b/tools/external/googletest/ci/env-linux.sh new file mode 100755 index 00000000..9086b1f9 --- /dev/null +++ b/tools/external/googletest/ci/env-linux.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# This file should be sourced, and not executed as a standalone script. +# + +# TODO() - we can check if this is being sourced using $BASH_VERSION and $BASH_SOURCE[0] != ${0}. + +if [ "${TRAVIS_OS_NAME}" = "linux" ]; then + if [ "$CXX" = "g++" ]; then export CXX="g++-4.9" CC="gcc-4.9"; fi + if [ "$CXX" = "clang++" ]; then export CXX="clang++-3.7" CC="clang-3.7"; fi +fi diff --git a/tools/external/googletest/ci/env-osx.sh b/tools/external/googletest/ci/env-osx.sh new file mode 100755 index 00000000..31c88357 --- /dev/null +++ b/tools/external/googletest/ci/env-osx.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# This file should be sourced, and not executed as a standalone script. +# + +# TODO() - we can check if this is being sourced using $BASH_VERSION and $BASH_SOURCE[0] != ${0}. + +if [ "${TRAVIS_OS_NAME}" = "linux" ]; then + if [ "$CXX" = "clang++" ]; then export CXX="clang++-3.7" CC="clang-3.7"; fi +fi diff --git a/tools/external/googletest/ci/install-linux.sh b/tools/external/googletest/ci/install-linux.sh new file mode 100755 index 00000000..02a19439 --- /dev/null +++ b/tools/external/googletest/ci/install-linux.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -eu + +if [ "${TRAVIS_OS_NAME}" != linux ]; then + echo "Not a Linux build; skipping installation" + exit 0 +fi + + +if [ "${TRAVIS_SUDO}" = "true" ]; then + echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | \ + sudo tee /etc/apt/sources.list.d/bazel.list + curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add - + sudo apt-get update && sudo apt-get install -y bazel gcc-4.9 g++-4.9 clang-3.7 +elif [ "${CXX}" = "clang++" ]; then + # Use ccache, assuming $HOME/bin is in the path, which is true in the Travis build environment. + ln -sf /usr/bin/ccache $HOME/bin/${CXX}; + ln -sf /usr/bin/ccache $HOME/bin/${CC}; +fi diff --git a/tools/external/googletest/ci/install-osx.sh b/tools/external/googletest/ci/install-osx.sh new file mode 100755 index 00000000..6550ff51 --- /dev/null +++ b/tools/external/googletest/ci/install-osx.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -eu + +if [ "${TRAVIS_OS_NAME}" != "osx" ]; then + echo "Not a macOS build; skipping installation" + exit 0 +fi + +brew install ccache diff --git a/tools/external/googletest/ci/log-config.sh b/tools/external/googletest/ci/log-config.sh new file mode 100755 index 00000000..5fef1194 --- /dev/null +++ b/tools/external/googletest/ci/log-config.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -e + +# ccache on OS X needs installation first +# reset ccache statistics +ccache --zero-stats + +echo PATH=${PATH} + +echo "Compiler configuration:" +echo CXX=${CXX} +echo CC=${CC} +echo CXXFLAGS=${CXXFLAGS} + +echo "C++ compiler version:" +${CXX} --version || echo "${CXX} does not seem to support the --version flag" +${CXX} -v || echo "${CXX} does not seem to support the -v flag" + +echo "C compiler version:" +${CC} --version || echo "${CXX} does not seem to support the --version flag" +${CC} -v || echo "${CXX} does not seem to support the -v flag" diff --git a/tools/external/googletest/ci/travis.sh b/tools/external/googletest/ci/travis.sh new file mode 100755 index 00000000..24a557e9 --- /dev/null +++ b/tools/external/googletest/ci/travis.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env sh +set -evx + +# if possible, ask for the precise number of processors, +# otherwise take 2 processors as reasonable default; see +# https://docs.travis-ci.com/user/speeding-up-the-build/#Makefile-optimization +if [ -x /usr/bin/getconf ]; then + NPROCESSORS=$(/usr/bin/getconf _NPROCESSORS_ONLN) +else + NPROCESSORS=2 +fi +# as of 2017-09-04 Travis CI reports 32 processors, but GCC build +# crashes if parallelized too much (maybe memory consumption problem), +# so limit to 4 processors for the time being. +if [ $NPROCESSORS -gt 4 ] ; then + echo "$0:Note: Limiting processors to use by make from $NPROCESSORS to 4." + NPROCESSORS=4 +fi +# Tell make to use the processors. No preceding '-' required. +MAKEFLAGS="j${NPROCESSORS}" +export MAKEFLAGS + +env | sort + +mkdir build || true +cd build +cmake -Dgtest_build_samples=ON \ + -Dgtest_build_tests=ON \ + -Dgmock_build_tests=ON \ + -DCMAKE_CXX_FLAGS=$CXX_FLAGS \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + .. +make +CTEST_OUTPUT_ON_FAILURE=1 make test diff --git a/tools/external/googletest/googlemock/CHANGES b/tools/external/googletest/googlemock/CHANGES new file mode 100644 index 00000000..4328ece3 --- /dev/null +++ b/tools/external/googletest/googlemock/CHANGES @@ -0,0 +1,126 @@ +Changes for 1.7.0: + +* All new improvements in Google Test 1.7.0. +* New feature: matchers DoubleNear(), FloatNear(), + NanSensitiveDoubleNear(), NanSensitiveFloatNear(), + UnorderedElementsAre(), UnorderedElementsAreArray(), WhenSorted(), + WhenSortedBy(), IsEmpty(), and SizeIs(). +* Improvement: Google Mock can now be built as a DLL. +* Improvement: when compiled by a C++11 compiler, matchers AllOf() + and AnyOf() can accept an arbitrary number of matchers. +* Improvement: when compiled by a C++11 compiler, matchers + ElementsAreArray() can accept an initializer list. +* Improvement: when exceptions are enabled, a mock method with no + default action now throws instead crashing the test. +* Improvement: added class testing::StringMatchResultListener to aid + definition of composite matchers. +* Improvement: function return types used in MOCK_METHOD*() macros can + now contain unprotected commas. +* Improvement (potentially breaking): EXPECT_THAT() and ASSERT_THAT() + are now more strict in ensuring that the value type and the matcher + type are compatible, catching potential bugs in tests. +* Improvement: Pointee() now works on an optional. +* Improvement: the ElementsAreArray() matcher can now take a vector or + iterator range as input, and makes a copy of its input elements + before the conversion to a Matcher. +* Improvement: the Google Mock Generator can now generate mocks for + some class templates. +* Bug fix: mock object destruction triggerred by another mock object's + destruction no longer hangs. +* Improvement: Google Mock Doctor works better with newer Clang and + GCC now. +* Compatibility fixes. +* Bug/warning fixes. + +Changes for 1.6.0: + +* Compilation is much faster and uses much less memory, especially + when the constructor and destructor of a mock class are moved out of + the class body. +* New matchers: Pointwise(), Each(). +* New actions: ReturnPointee() and ReturnRefOfCopy(). +* CMake support. +* Project files for Visual Studio 2010. +* AllOf() and AnyOf() can handle up-to 10 arguments now. +* Google Mock doctor understands Clang error messages now. +* SetArgPointee<> now accepts string literals. +* gmock_gen.py handles storage specifier macros and template return + types now. +* Compatibility fixes. +* Bug fixes and implementation clean-ups. +* Potentially incompatible changes: disables the harmful 'make install' + command in autotools. + +Potentially breaking changes: + +* The description string for MATCHER*() changes from Python-style + interpolation to an ordinary C++ string expression. +* SetArgumentPointee is deprecated in favor of SetArgPointee. +* Some non-essential project files for Visual Studio 2005 are removed. + +Changes for 1.5.0: + + * New feature: Google Mock can be safely used in multi-threaded tests + on platforms having pthreads. + * New feature: function for printing a value of arbitrary type. + * New feature: function ExplainMatchResult() for easy definition of + composite matchers. + * The new matcher API lets user-defined matchers generate custom + explanations more directly and efficiently. + * Better failure messages all around. + * NotNull() and IsNull() now work with smart pointers. + * Field() and Property() now work when the matcher argument is a pointer + passed by reference. + * Regular expression matchers on all platforms. + * Added GCC 4.0 support for Google Mock Doctor. + * Added gmock_all_test.cc for compiling most Google Mock tests + in a single file. + * Significantly cleaned up compiler warnings. + * Bug fixes, better test coverage, and implementation clean-ups. + + Potentially breaking changes: + + * Custom matchers defined using MatcherInterface or MakePolymorphicMatcher() + need to be updated after upgrading to Google Mock 1.5.0; matchers defined + using MATCHER or MATCHER_P* aren't affected. + * Dropped support for 'make install'. + +Changes for 1.4.0 (we skipped 1.2.* and 1.3.* to match the version of +Google Test): + + * Works in more environments: Symbian and minGW, Visual C++ 7.1. + * Lighter weight: comes with our own implementation of TR1 tuple (no + more dependency on Boost!). + * New feature: --gmock_catch_leaked_mocks for detecting leaked mocks. + * New feature: ACTION_TEMPLATE for defining templatized actions. + * New feature: the .After() clause for specifying expectation order. + * New feature: the .With() clause for specifying inter-argument + constraints. + * New feature: actions ReturnArg(), ReturnNew(...), and + DeleteArg(). + * New feature: matchers Key(), Pair(), Args<...>(), AllArgs(), IsNull(), + and Contains(). + * New feature: utility class MockFunction, useful for checkpoints, etc. + * New feature: functions Value(x, m) and SafeMatcherCast(m). + * New feature: copying a mock object is rejected at compile time. + * New feature: a script for fusing all Google Mock and Google Test + source files for easy deployment. + * Improved the Google Mock doctor to diagnose more diseases. + * Improved the Google Mock generator script. + * Compatibility fixes for Mac OS X and gcc. + * Bug fixes and implementation clean-ups. + +Changes for 1.1.0: + + * New feature: ability to use Google Mock with any testing framework. + * New feature: macros for easily defining new matchers + * New feature: macros for easily defining new actions. + * New feature: more container matchers. + * New feature: actions for accessing function arguments and throwing + exceptions. + * Improved the Google Mock doctor script for diagnosing compiler errors. + * Bug fixes and implementation clean-ups. + +Changes for 1.0.0: + + * Initial Open Source release of Google Mock diff --git a/tools/external/googletest/googlemock/CMakeLists.txt b/tools/external/googletest/googlemock/CMakeLists.txt new file mode 100644 index 00000000..ead51bf3 --- /dev/null +++ b/tools/external/googletest/googlemock/CMakeLists.txt @@ -0,0 +1,225 @@ +######################################################################## +# CMake build script for Google Mock. +# +# To run the tests for Google Mock itself on Linux, use 'make test' or +# ctest. You can select which tests to run using 'ctest -R regex'. +# For more options, run 'ctest --help'. + +# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to +# make it prominent in the GUI. +option(BUILD_SHARED_LIBS "Build shared libraries (DLLs)." OFF) + +option(gmock_build_tests "Build all of Google Mock's own tests." OFF) + +# A directory to find Google Test sources. +if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/gtest/CMakeLists.txt") + set(gtest_dir gtest) +else() + set(gtest_dir ../googletest) +endif() + +# Defines pre_project_set_up_hermetic_build() and set_up_hermetic_build(). +include("${gtest_dir}/cmake/hermetic_build.cmake" OPTIONAL) + +if (COMMAND pre_project_set_up_hermetic_build) + # Google Test also calls hermetic setup functions from add_subdirectory, + # although its changes will not affect things at the current scope. + pre_project_set_up_hermetic_build() +endif() + +######################################################################## +# +# Project-wide settings + +# Name of the project. +# +# CMake files in this project can refer to the root source directory +# as ${gmock_SOURCE_DIR} and to the root binary directory as +# ${gmock_BINARY_DIR}. +# Language "C" is required for find_package(Threads). +if (CMAKE_VERSION VERSION_LESS 3.0) + project(gmock CXX C) +else() + cmake_policy(SET CMP0048 NEW) + project(gmock VERSION 1.9.0 LANGUAGES CXX C) +endif() +cmake_minimum_required(VERSION 2.6.4) + +if (COMMAND set_up_hermetic_build) + set_up_hermetic_build() +endif() + +# Instructs CMake to process Google Test's CMakeLists.txt and add its +# targets to the current scope. We are placing Google Test's binary +# directory in a subdirectory of our own as VC compilation may break +# if they are the same (the default). +add_subdirectory("${gtest_dir}" "${gmock_BINARY_DIR}/gtest") + +# Although Google Test's CMakeLists.txt calls this function, the +# changes there don't affect the current scope. Therefore we have to +# call it again here. +config_compiler_and_linker() # from ${gtest_dir}/cmake/internal_utils.cmake + +# Adds Google Mock's and Google Test's header directories to the search path. +include_directories("${gmock_SOURCE_DIR}/include" + "${gmock_SOURCE_DIR}" + "${gtest_SOURCE_DIR}/include" + # This directory is needed to build directly from Google + # Test sources. + "${gtest_SOURCE_DIR}") + +# Summary of tuple support for Microsoft Visual Studio: +# Compiler version(MS) version(cmake) Support +# ---------- ----------- -------------- ----------------------------- +# <= VS 2010 <= 10 <= 1600 Use Google Tests's own tuple. +# VS 2012 11 1700 std::tr1::tuple + _VARIADIC_MAX=10 +# VS 2013 12 1800 std::tr1::tuple +# VS 2015 14 1900 std::tuple +# VS 2017 15 >= 1910 std::tuple +if (MSVC AND MSVC_VERSION EQUAL 1700) + add_definitions(/D _VARIADIC_MAX=10) +endif() + +######################################################################## +# +# Defines the gmock & gmock_main libraries. User tests should link +# with one of them. + +# Google Mock libraries. We build them using more strict warnings than what +# are used for other targets, to ensure that Google Mock can be compiled by +# a user aggressive about warnings. +cxx_library(gmock + "${cxx_strict}" + "${gtest_dir}/src/gtest-all.cc" + src/gmock-all.cc) + +cxx_library(gmock_main + "${cxx_strict}" + "${gtest_dir}/src/gtest-all.cc" + src/gmock-all.cc + src/gmock_main.cc) + +# If the CMake version supports it, attach header directory information +# to the targets for when we are part of a parent build (ie being pulled +# in via add_subdirectory() rather than being a standalone build). +if (DEFINED CMAKE_VERSION AND NOT "${CMAKE_VERSION}" VERSION_LESS "2.8.11") + target_include_directories(gmock SYSTEM INTERFACE "${gmock_SOURCE_DIR}/include") + target_include_directories(gmock_main SYSTEM INTERFACE "${gmock_SOURCE_DIR}/include") +endif() + +######################################################################## +# +# Install rules +if(INSTALL_GMOCK) + install(TARGETS gmock gmock_main + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(DIRECTORY ${gmock_SOURCE_DIR}/include/gmock + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + + # configure and install pkgconfig files + configure_file( + cmake/gmock.pc.in + "${CMAKE_BINARY_DIR}/gmock.pc" + @ONLY) + configure_file( + cmake/gmock_main.pc.in + "${CMAKE_BINARY_DIR}/gmock_main.pc" + @ONLY) + install(FILES "${CMAKE_BINARY_DIR}/gmock.pc" "${CMAKE_BINARY_DIR}/gmock_main.pc" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") +endif() + +######################################################################## +# +# Google Mock's own tests. +# +# You can skip this section if you aren't interested in testing +# Google Mock itself. +# +# The tests are not built by default. To build them, set the +# gmock_build_tests option to ON. You can do it by running ccmake +# or specifying the -Dgmock_build_tests=ON flag when running cmake. + +if (gmock_build_tests) + # This must be set in the root directory for the tests to be run by + # 'make test' or ctest. + enable_testing() + + ############################################################ + # C++ tests built with standard compiler flags. + + cxx_test(gmock-actions_test gmock_main) + cxx_test(gmock-cardinalities_test gmock_main) + cxx_test(gmock_ex_test gmock_main) + cxx_test(gmock-generated-actions_test gmock_main) + cxx_test(gmock-generated-function-mockers_test gmock_main) + cxx_test(gmock-generated-internal-utils_test gmock_main) + cxx_test(gmock-generated-matchers_test gmock_main) + cxx_test(gmock-internal-utils_test gmock_main) + cxx_test(gmock-matchers_test gmock_main) + cxx_test(gmock-more-actions_test gmock_main) + cxx_test(gmock-nice-strict_test gmock_main) + cxx_test(gmock-port_test gmock_main) + cxx_test(gmock-spec-builders_test gmock_main) + cxx_test(gmock_link_test gmock_main test/gmock_link2_test.cc) + cxx_test(gmock_test gmock_main) + + if (DEFINED GTEST_HAS_PTHREAD) + cxx_test(gmock_stress_test gmock) + endif() + + # gmock_all_test is commented to save time building and running tests. + # Uncomment if necessary. + # cxx_test(gmock_all_test gmock_main) + + ############################################################ + # C++ tests built with non-standard compiler flags. + + cxx_library(gmock_main_no_exception "${cxx_no_exception}" + "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) + + cxx_library(gmock_main_no_rtti "${cxx_no_rtti}" + "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) + + if (NOT MSVC OR MSVC_VERSION LESS 1600) # 1600 is Visual Studio 2010. + # Visual Studio 2010, 2012, and 2013 define symbols in std::tr1 that + # conflict with our own definitions. Therefore using our own tuple does not + # work on those compilers. + cxx_library(gmock_main_use_own_tuple "${cxx_use_own_tuple}" + "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) + + cxx_test_with_flags(gmock_use_own_tuple_test "${cxx_use_own_tuple}" + gmock_main_use_own_tuple test/gmock-spec-builders_test.cc) + endif() + + cxx_test_with_flags(gmock-more-actions_no_exception_test "${cxx_no_exception}" + gmock_main_no_exception test/gmock-more-actions_test.cc) + + cxx_test_with_flags(gmock_no_rtti_test "${cxx_no_rtti}" + gmock_main_no_rtti test/gmock-spec-builders_test.cc) + + cxx_shared_library(shared_gmock_main "${cxx_default}" + "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) + + # Tests that a binary can be built with Google Mock as a shared library. On + # some system configurations, it may not possible to run the binary without + # knowing more details about the system configurations. We do not try to run + # this binary. To get a more robust shared library coverage, configure with + # -DBUILD_SHARED_LIBS=ON. + cxx_executable_with_flags(shared_gmock_test_ "${cxx_default}" + shared_gmock_main test/gmock-spec-builders_test.cc) + set_target_properties(shared_gmock_test_ + PROPERTIES + COMPILE_DEFINITIONS "GTEST_LINKED_AS_SHARED_LIBRARY=1") + + ############################################################ + # Python tests. + + cxx_executable(gmock_leak_test_ test gmock_main) + py_test(gmock_leak_test) + + cxx_executable(gmock_output_test_ test gmock) + py_test(gmock_output_test) +endif() diff --git a/tools/external/googletest/googlemock/CONTRIBUTORS b/tools/external/googletest/googlemock/CONTRIBUTORS new file mode 100644 index 00000000..6e9ae362 --- /dev/null +++ b/tools/external/googletest/googlemock/CONTRIBUTORS @@ -0,0 +1,40 @@ +# This file contains a list of people who've made non-trivial +# contribution to the Google C++ Mocking Framework project. People +# who commit code to the project are encouraged to add their names +# here. Please keep the list sorted by first names. + +Benoit Sigoure +Bogdan Piloca +Chandler Carruth +Dave MacLachlan +David Anderson +Dean Sturtevant +Gene Volovich +Hal Burch +Jeffrey Yasskin +Jim Keller +Joe Walnes +Jon Wray +Keir Mierle +Keith Ray +Kostya Serebryany +Lev Makhlis +Manuel Klimek +Mario Tanev +Mark Paskin +Markus Heule +Matthew Simmons +Mike Bland +Neal Norwitz +Nermin Ozkiranartli +Owen Carlsen +Paneendra Ba +Paul Menage +Piotr Kaminski +Russ Rufer +Sverre Sundsdal +Takeshi Yoshino +Vadim Berman +Vlad Losev +Wolfgang Klier +Zhanyong Wan diff --git a/tools/external/googletest/googlemock/LICENSE b/tools/external/googletest/googlemock/LICENSE new file mode 100644 index 00000000..1941a11f --- /dev/null +++ b/tools/external/googletest/googlemock/LICENSE @@ -0,0 +1,28 @@ +Copyright 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/tools/external/googletest/googlemock/Makefile.am b/tools/external/googletest/googlemock/Makefile.am new file mode 100644 index 00000000..9adbc516 --- /dev/null +++ b/tools/external/googletest/googlemock/Makefile.am @@ -0,0 +1,224 @@ +# Automake file + +# Nonstandard package files for distribution. +EXTRA_DIST = LICENSE + +# We may need to build our internally packaged gtest. If so, it will be +# included in the 'subdirs' variable. +SUBDIRS = $(subdirs) + +# This is generated by the configure script, so clean it for distribution. +DISTCLEANFILES = scripts/gmock-config + +# We define the global AM_CPPFLAGS as everything we compile includes from these +# directories. +AM_CPPFLAGS = $(GTEST_CPPFLAGS) -I$(srcdir)/include + +# Modifies compiler and linker flags for pthreads compatibility. +if HAVE_PTHREADS + AM_CXXFLAGS = @PTHREAD_CFLAGS@ -DGTEST_HAS_PTHREAD=1 + AM_LIBS = @PTHREAD_LIBS@ +endif + +# Build rules for libraries. +lib_LTLIBRARIES = lib/libgmock.la lib/libgmock_main.la + +lib_libgmock_la_SOURCES = src/gmock-all.cc + +pkginclude_HEADERS = \ + include/gmock/gmock-actions.h \ + include/gmock/gmock-cardinalities.h \ + include/gmock/gmock-generated-actions.h \ + include/gmock/gmock-generated-function-mockers.h \ + include/gmock/gmock-generated-matchers.h \ + include/gmock/gmock-generated-nice-strict.h \ + include/gmock/gmock-matchers.h \ + include/gmock/gmock-more-actions.h \ + include/gmock/gmock-more-matchers.h \ + include/gmock/gmock-spec-builders.h \ + include/gmock/gmock.h + +pkginclude_internaldir = $(pkgincludedir)/internal +pkginclude_internal_HEADERS = \ + include/gmock/internal/gmock-generated-internal-utils.h \ + include/gmock/internal/gmock-internal-utils.h \ + include/gmock/internal/gmock-port.h \ + include/gmock/internal/custom/gmock-generated-actions.h \ + include/gmock/internal/custom/gmock-matchers.h \ + include/gmock/internal/custom/gmock-port.h + +lib_libgmock_main_la_SOURCES = src/gmock_main.cc +lib_libgmock_main_la_LIBADD = lib/libgmock.la + +# Build rules for tests. Automake's naming for some of these variables isn't +# terribly obvious, so this is a brief reference: +# +# TESTS -- Programs run automatically by "make check" +# check_PROGRAMS -- Programs built by "make check" but not necessarily run + +TESTS= +check_PROGRAMS= +AM_LDFLAGS = $(GTEST_LDFLAGS) + +# This exercises all major components of Google Mock. It also +# verifies that libgmock works. +TESTS += test/gmock-spec-builders_test +check_PROGRAMS += test/gmock-spec-builders_test +test_gmock_spec_builders_test_SOURCES = test/gmock-spec-builders_test.cc +test_gmock_spec_builders_test_LDADD = $(GTEST_LIBS) lib/libgmock.la + +# This tests using Google Mock in multiple translation units. It also +# verifies that libgmock_main and libgmock work. +TESTS += test/gmock_link_test +check_PROGRAMS += test/gmock_link_test +test_gmock_link_test_SOURCES = \ + test/gmock_link2_test.cc \ + test/gmock_link_test.cc \ + test/gmock_link_test.h +test_gmock_link_test_LDADD = $(GTEST_LIBS) lib/libgmock_main.la lib/libgmock.la + +if HAVE_PYTHON + # Tests that fused gmock files compile and work. + TESTS += test/gmock_fused_test + check_PROGRAMS += test/gmock_fused_test + test_gmock_fused_test_SOURCES = \ + fused-src/gmock-gtest-all.cc \ + fused-src/gmock/gmock.h \ + fused-src/gmock_main.cc \ + fused-src/gtest/gtest.h \ + test/gmock_test.cc + test_gmock_fused_test_CPPFLAGS = -I"$(srcdir)/fused-src" +endif + +# Google Mock source files that we don't compile directly. +GMOCK_SOURCE_INGLUDES = \ + src/gmock-cardinalities.cc \ + src/gmock-internal-utils.cc \ + src/gmock-matchers.cc \ + src/gmock-spec-builders.cc \ + src/gmock.cc + +EXTRA_DIST += $(GMOCK_SOURCE_INGLUDES) + +# C++ tests that we don't compile using autotools. +EXTRA_DIST += \ + test/gmock-actions_test.cc \ + test/gmock_all_test.cc \ + test/gmock-cardinalities_test.cc \ + test/gmock_ex_test.cc \ + test/gmock-generated-actions_test.cc \ + test/gmock-generated-function-mockers_test.cc \ + test/gmock-generated-internal-utils_test.cc \ + test/gmock-generated-matchers_test.cc \ + test/gmock-internal-utils_test.cc \ + test/gmock-matchers_test.cc \ + test/gmock-more-actions_test.cc \ + test/gmock-nice-strict_test.cc \ + test/gmock-port_test.cc \ + test/gmock_stress_test.cc + +# Python tests, which we don't run using autotools. +EXTRA_DIST += \ + test/gmock_leak_test.py \ + test/gmock_leak_test_.cc \ + test/gmock_output_test.py \ + test/gmock_output_test_.cc \ + test/gmock_output_test_golden.txt \ + test/gmock_test_utils.py + +# Nonstandard package files for distribution. +EXTRA_DIST += \ + CHANGES \ + CONTRIBUTORS \ + make/Makefile + +# Pump scripts for generating Google Mock headers. +# TODO(chandlerc@google.com): automate the generation of *.h from *.h.pump. +EXTRA_DIST += \ + include/gmock/gmock-generated-actions.h.pump \ + include/gmock/gmock-generated-function-mockers.h.pump \ + include/gmock/gmock-generated-matchers.h.pump \ + include/gmock/gmock-generated-nice-strict.h.pump \ + include/gmock/internal/gmock-generated-internal-utils.h.pump \ + include/gmock/internal/custom/gmock-generated-actions.h.pump + +# Script for fusing Google Mock and Google Test source files. +EXTRA_DIST += scripts/fuse_gmock_files.py + +# The Google Mock Generator tool from the cppclean project. +EXTRA_DIST += \ + scripts/generator/LICENSE \ + scripts/generator/README \ + scripts/generator/README.cppclean \ + scripts/generator/cpp/__init__.py \ + scripts/generator/cpp/ast.py \ + scripts/generator/cpp/gmock_class.py \ + scripts/generator/cpp/keywords.py \ + scripts/generator/cpp/tokenize.py \ + scripts/generator/cpp/utils.py \ + scripts/generator/gmock_gen.py + +# Script for diagnosing compiler errors in programs that use Google +# Mock. +EXTRA_DIST += scripts/gmock_doctor.py + +# CMake scripts. +EXTRA_DIST += \ + CMakeLists.txt + +# Microsoft Visual Studio 2005 projects. +EXTRA_DIST += \ + msvc/2005/gmock.sln \ + msvc/2005/gmock.vcproj \ + msvc/2005/gmock_config.vsprops \ + msvc/2005/gmock_main.vcproj \ + msvc/2005/gmock_test.vcproj + +# Microsoft Visual Studio 2010 projects. +EXTRA_DIST += \ + msvc/2010/gmock.sln \ + msvc/2010/gmock.vcxproj \ + msvc/2010/gmock_config.props \ + msvc/2010/gmock_main.vcxproj \ + msvc/2010/gmock_test.vcxproj + +if HAVE_PYTHON +# gmock_test.cc does not really depend on files generated by the +# fused-gmock-internal rule. However, gmock_test.o does, and it is +# important to include test/gmock_test.cc as part of this rule in order to +# prevent compiling gmock_test.o until all dependent files have been +# generated. +$(test_gmock_fused_test_SOURCES): fused-gmock-internal + +# TODO(vladl@google.com): Find a way to add Google Tests's sources here. +fused-gmock-internal: $(pkginclude_HEADERS) $(pkginclude_internal_HEADERS) \ + $(lib_libgmock_la_SOURCES) $(GMOCK_SOURCE_INGLUDES) \ + $(lib_libgmock_main_la_SOURCES) \ + scripts/fuse_gmock_files.py + mkdir -p "$(srcdir)/fused-src" + chmod -R u+w "$(srcdir)/fused-src" + rm -f "$(srcdir)/fused-src/gtest/gtest.h" + rm -f "$(srcdir)/fused-src/gmock/gmock.h" + rm -f "$(srcdir)/fused-src/gmock-gtest-all.cc" + "$(srcdir)/scripts/fuse_gmock_files.py" "$(srcdir)/fused-src" + cp -f "$(srcdir)/src/gmock_main.cc" "$(srcdir)/fused-src" + +maintainer-clean-local: + rm -rf "$(srcdir)/fused-src" +endif + +# Death tests may produce core dumps in the build directory. In case +# this happens, clean them to keep distcleancheck happy. +CLEANFILES = core + +# Disables 'make install' as installing a compiled version of Google +# Mock can lead to undefined behavior due to violation of the +# One-Definition Rule. + +install-exec-local: + echo "'make install' is dangerous and not supported. Instead, see README for how to integrate Google Mock into your build system." + false + +install-data-local: + echo "'make install' is dangerous and not supported. Instead, see README for how to integrate Google Mock into your build system." + false diff --git a/tools/external/googletest/googlemock/README.md b/tools/external/googletest/googlemock/README.md new file mode 100644 index 00000000..f941f158 --- /dev/null +++ b/tools/external/googletest/googlemock/README.md @@ -0,0 +1,376 @@ +## Google Mock ## + +The Google C++ mocking framework. + +### Overview ### + +Google's framework for writing and using C++ mock classes. +It can help you derive better designs of your system and write better tests. + +It is inspired by: + + * [jMock](http://www.jmock.org/), + * [EasyMock](http://www.easymock.org/), and + * [Hamcrest](http://code.google.com/p/hamcrest/), + +and designed with C++'s specifics in mind. + +Google mock: + + * lets you create mock classes trivially using simple macros. + * supports a rich set of matchers and actions. + * handles unordered, partially ordered, or completely ordered expectations. + * is extensible by users. + +We hope you find it useful! + +### Features ### + + * Provides a declarative syntax for defining mocks. + * Can easily define partial (hybrid) mocks, which are a cross of real + and mock objects. + * Handles functions of arbitrary types and overloaded functions. + * Comes with a rich set of matchers for validating function arguments. + * Uses an intuitive syntax for controlling the behavior of a mock. + * Does automatic verification of expectations (no record-and-replay needed). + * Allows arbitrary (partial) ordering constraints on + function calls to be expressed,. + * Lets an user extend it by defining new matchers and actions. + * Does not use exceptions. + * Is easy to learn and use. + +Please see the project page above for more information as well as the +mailing list for questions, discussions, and development. There is +also an IRC channel on OFTC (irc.oftc.net) #gtest available. Please +join us! + +Please note that code under [scripts/generator](scripts/generator/) is +from [cppclean](http://code.google.com/p/cppclean/) and released under +the Apache License, which is different from Google Mock's license. + +## Getting Started ## + +If you are new to the project, we suggest that you read the user +documentation in the following order: + + * Learn the [basics](../../master/googletest/docs/Primer.md) of + Google Test, if you choose to use Google Mock with it (recommended). + * Read [Google Mock for Dummies](../../master/googlemock/docs/ForDummies.md). + * Read the instructions below on how to build Google Mock. + +You can also watch Zhanyong's [talk](http://www.youtube.com/watch?v=sYpCyLI47rM) on Google Mock's usage and implementation. + +Once you understand the basics, check out the rest of the docs: + + * [CheatSheet](../../master/googlemock/docs/CheatSheet.md) - all the commonly used stuff + at a glance. + * [CookBook](../../master/googlemock/docs/CookBook.md) - recipes for getting things done, + including advanced techniques. + +If you need help, please check the +[KnownIssues](docs/KnownIssues.md) and +[FrequentlyAskedQuestions](docs/FrequentlyAskedQuestions.md) before +posting a question on the +[discussion group](http://groups.google.com/group/googlemock). + + +### Using Google Mock Without Google Test ### + +Google Mock is not a testing framework itself. Instead, it needs a +testing framework for writing tests. Google Mock works seamlessly +with [Google Test](https://github.com/google/googletest), but +you can also use it with [any C++ testing framework](../../master/googlemock/docs/ForDummies.md#using-google-mock-with-any-testing-framework). + +### Requirements for End Users ### + +Google Mock is implemented on top of [Google Test]( +http://github.com/google/googletest/), and depends on it. +You must use the bundled version of Google Test when using Google Mock. + +You can also easily configure Google Mock to work with another testing +framework, although it will still need Google Test. Please read +["Using_Google_Mock_with_Any_Testing_Framework"]( + ../../master/googlemock/docs/ForDummies.md#using-google-mock-with-any-testing-framework) +for instructions. + +Google Mock depends on advanced C++ features and thus requires a more +modern compiler. The following are needed to use Google Mock: + +#### Linux Requirements #### + + * GNU-compatible Make or "gmake" + * POSIX-standard shell + * POSIX(-2) Regular Expressions (regex.h) + * C++98-standard-compliant compiler (e.g. GCC 3.4 or newer) + +#### Windows Requirements #### + + * Microsoft Visual C++ 8.0 SP1 or newer + +#### Mac OS X Requirements #### + + * Mac OS X 10.4 Tiger or newer + * Developer Tools Installed + +### Requirements for Contributors ### + +We welcome patches. If you plan to contribute a patch, you need to +build Google Mock and its tests, which has further requirements: + + * Automake version 1.9 or newer + * Autoconf version 2.59 or newer + * Libtool / Libtoolize + * Python version 2.3 or newer (for running some of the tests and + re-generating certain source files from templates) + +### Building Google Mock ### + +#### Using CMake #### + +If you have CMake available, it is recommended that you follow the +[build instructions][gtest_cmakebuild] +as described for Google Test. + +If are using Google Mock with an +existing CMake project, the section +[Incorporating Into An Existing CMake Project][gtest_incorpcmake] +may be of particular interest. +To make it work for Google Mock you will need to change + + target_link_libraries(example gtest_main) + +to + + target_link_libraries(example gmock_main) + +This works because `gmock_main` library is compiled with Google Test. +However, it does not automatically add Google Test includes. +Therefore you will also have to change + + if (CMAKE_VERSION VERSION_LESS 2.8.11) + include_directories("${gtest_SOURCE_DIR}/include") + endif() + +to + + if (CMAKE_VERSION VERSION_LESS 2.8.11) + include_directories(BEFORE SYSTEM + "${gtest_SOURCE_DIR}/include" "${gmock_SOURCE_DIR}/include") + else() + target_include_directories(gmock_main SYSTEM BEFORE INTERFACE + "${gtest_SOURCE_DIR}/include" "${gmock_SOURCE_DIR}/include") + endif() + +This will addtionally mark Google Mock includes as system, which will +silence compiler warnings when compiling your tests using clang with +`-Wpedantic -Wall -Wextra -Wconversion`. + + +#### Preparing to Build (Unix only) #### + +If you are using a Unix system and plan to use the GNU Autotools build +system to build Google Mock (described below), you'll need to +configure it now. + +To prepare the Autotools build system: + + cd googlemock + autoreconf -fvi + +To build Google Mock and your tests that use it, you need to tell your +build system where to find its headers and source files. The exact +way to do it depends on which build system you use, and is usually +straightforward. + +This section shows how you can integrate Google Mock into your +existing build system. + +Suppose you put Google Mock in directory `${GMOCK_DIR}` and Google Test +in `${GTEST_DIR}` (the latter is `${GMOCK_DIR}/gtest` by default). To +build Google Mock, create a library build target (or a project as +called by Visual Studio and Xcode) to compile + + ${GTEST_DIR}/src/gtest-all.cc and ${GMOCK_DIR}/src/gmock-all.cc + +with + + ${GTEST_DIR}/include and ${GMOCK_DIR}/include + +in the system header search path, and + + ${GTEST_DIR} and ${GMOCK_DIR} + +in the normal header search path. Assuming a Linux-like system and gcc, +something like the following will do: + + g++ -isystem ${GTEST_DIR}/include -I${GTEST_DIR} \ + -isystem ${GMOCK_DIR}/include -I${GMOCK_DIR} \ + -pthread -c ${GTEST_DIR}/src/gtest-all.cc + g++ -isystem ${GTEST_DIR}/include -I${GTEST_DIR} \ + -isystem ${GMOCK_DIR}/include -I${GMOCK_DIR} \ + -pthread -c ${GMOCK_DIR}/src/gmock-all.cc + ar -rv libgmock.a gtest-all.o gmock-all.o + +(We need -pthread as Google Test and Google Mock use threads.) + +Next, you should compile your test source file with +${GTEST\_DIR}/include and ${GMOCK\_DIR}/include in the header search +path, and link it with gmock and any other necessary libraries: + + g++ -isystem ${GTEST_DIR}/include -isystem ${GMOCK_DIR}/include \ + -pthread path/to/your_test.cc libgmock.a -o your_test + +As an example, the make/ directory contains a Makefile that you can +use to build Google Mock on systems where GNU make is available +(e.g. Linux, Mac OS X, and Cygwin). It doesn't try to build Google +Mock's own tests. Instead, it just builds the Google Mock library and +a sample test. You can use it as a starting point for your own build +script. + +If the default settings are correct for your environment, the +following commands should succeed: + + cd ${GMOCK_DIR}/make + make + ./gmock_test + +If you see errors, try to tweak the contents of +[make/Makefile](make/Makefile) to make them go away. + +### Windows ### + +The msvc/2005 directory contains VC++ 2005 projects and the msvc/2010 +directory contains VC++ 2010 projects for building Google Mock and +selected tests. + +Change to the appropriate directory and run "msbuild gmock.sln" to +build the library and tests (or open the gmock.sln in the MSVC IDE). +If you want to create your own project to use with Google Mock, you'll +have to configure it to use the `gmock_config` propety sheet. For that: + + * Open the Property Manager window (View | Other Windows | Property Manager) + * Right-click on your project and select "Add Existing Property Sheet..." + * Navigate to `gmock_config.vsprops` or `gmock_config.props` and select it. + * In Project Properties | Configuration Properties | General | Additional + Include Directories, type /include. + +### Tweaking Google Mock ### + +Google Mock can be used in diverse environments. The default +configuration may not work (or may not work well) out of the box in +some environments. However, you can easily tweak Google Mock by +defining control macros on the compiler command line. Generally, +these macros are named like `GTEST_XYZ` and you define them to either 1 +or 0 to enable or disable a certain feature. + +We list the most frequently used macros below. For a complete list, +see file [${GTEST\_DIR}/include/gtest/internal/gtest-port.h]( +../googletest/include/gtest/internal/gtest-port.h). + +### Choosing a TR1 Tuple Library ### + +Google Mock uses the C++ Technical Report 1 (TR1) tuple library +heavily. Unfortunately TR1 tuple is not yet widely available with all +compilers. The good news is that Google Test 1.4.0+ implements a +subset of TR1 tuple that's enough for Google Mock's need. Google Mock +will automatically use that implementation when the compiler doesn't +provide TR1 tuple. + +Usually you don't need to care about which tuple library Google Test +and Google Mock use. However, if your project already uses TR1 tuple, +you need to tell Google Test and Google Mock to use the same TR1 tuple +library the rest of your project uses, or the two tuple +implementations will clash. To do that, add + + -DGTEST_USE_OWN_TR1_TUPLE=0 + +to the compiler flags while compiling Google Test, Google Mock, and +your tests. If you want to force Google Test and Google Mock to use +their own tuple library, just add + + -DGTEST_USE_OWN_TR1_TUPLE=1 + +to the compiler flags instead. + +If you want to use Boost's TR1 tuple library with Google Mock, please +refer to the Boost website (http://www.boost.org/) for how to obtain +it and set it up. + +### As a Shared Library (DLL) ### + +Google Mock is compact, so most users can build and link it as a static +library for the simplicity. Google Mock can be used as a DLL, but the +same DLL must contain Google Test as well. See +[Google Test's README][gtest_readme] +for instructions on how to set up necessary compiler settings. + +### Tweaking Google Mock ### + +Most of Google Test's control macros apply to Google Mock as well. +Please see [Google Test's README][gtest_readme] for how to tweak them. + +### Upgrading from an Earlier Version ### + +We strive to keep Google Mock releases backward compatible. +Sometimes, though, we have to make some breaking changes for the +users' long-term benefits. This section describes what you'll need to +do if you are upgrading from an earlier version of Google Mock. + +#### Upgrading from 1.1.0 or Earlier #### + +You may need to explicitly enable or disable Google Test's own TR1 +tuple library. See the instructions in section "[Choosing a TR1 Tuple +Library](../googletest/#choosing-a-tr1-tuple-library)". + +#### Upgrading from 1.4.0 or Earlier #### + +On platforms where the pthread library is available, Google Test and +Google Mock use it in order to be thread-safe. For this to work, you +may need to tweak your compiler and/or linker flags. Please see the +"[Multi-threaded Tests](../googletest#multi-threaded-tests +)" section in file Google Test's README for what you may need to do. + +If you have custom matchers defined using `MatcherInterface` or +`MakePolymorphicMatcher()`, you'll need to update their definitions to +use the new matcher API ( +[monomorphic](./docs/CookBook.md#writing-new-monomorphic-matchers), +[polymorphic](./docs/CookBook.md#writing-new-polymorphic-matchers)). +Matchers defined using `MATCHER()` or `MATCHER_P*()` aren't affected. + +### Developing Google Mock ### + +This section discusses how to make your own changes to Google Mock. + +#### Testing Google Mock Itself #### + +To make sure your changes work as intended and don't break existing +functionality, you'll want to compile and run Google Test's own tests. +For that you'll need Autotools. First, make sure you have followed +the instructions above to configure Google Mock. +Then, create a build output directory and enter it. Next, + + ${GMOCK_DIR}/configure # try --help for more info + +Once you have successfully configured Google Mock, the build steps are +standard for GNU-style OSS packages. + + make # Standard makefile following GNU conventions + make check # Builds and runs all tests - all should pass. + +Note that when building your project against Google Mock, you are building +against Google Test as well. There is no need to configure Google Test +separately. + +#### Contributing a Patch #### + +We welcome patches. +Please read the [Developer's Guide](docs/DevGuide.md) +for how you can contribute. In particular, make sure you have signed +the Contributor License Agreement, or we won't be able to accept the +patch. + +Happy testing! + +[gtest_readme]: ../googletest/README.md "googletest" +[gtest_cmakebuild]: ../googletest/README.md#using-cmake "Using CMake" +[gtest_incorpcmake]: ../googletest/README.md#incorporating-into-an-existing-cmake-project "Incorporating Into An Existing CMake Project" diff --git a/tools/external/googletest/googlemock/build-aux/.keep b/tools/external/googletest/googlemock/build-aux/.keep new file mode 100644 index 00000000..e69de29b diff --git a/tools/external/googletest/googlemock/cmake/gmock.pc.in b/tools/external/googletest/googlemock/cmake/gmock.pc.in new file mode 100644 index 00000000..c4416426 --- /dev/null +++ b/tools/external/googletest/googlemock/cmake/gmock.pc.in @@ -0,0 +1,9 @@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ + +Name: gmock +Description: GoogleMock (without main() function) +Version: @PROJECT_VERSION@ +URL: https://github.com/google/googletest +Libs: -L${libdir} -lgmock @CMAKE_THREAD_LIBS_INIT@ +Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@ @CMAKE_THREAD_LIBS_INIT@ diff --git a/tools/external/googletest/googlemock/cmake/gmock_main.pc.in b/tools/external/googletest/googlemock/cmake/gmock_main.pc.in new file mode 100644 index 00000000..c377dba1 --- /dev/null +++ b/tools/external/googletest/googlemock/cmake/gmock_main.pc.in @@ -0,0 +1,9 @@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ + +Name: gmock_main +Description: GoogleMock (with main() function) +Version: @PROJECT_VERSION@ +URL: https://github.com/google/googletest +Libs: -L${libdir} -lgmock_main @CMAKE_THREAD_LIBS_INIT@ +Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@ @CMAKE_THREAD_LIBS_INIT@ diff --git a/tools/external/googletest/googlemock/configure.ac b/tools/external/googletest/googlemock/configure.ac new file mode 100644 index 00000000..c23ed455 --- /dev/null +++ b/tools/external/googletest/googlemock/configure.ac @@ -0,0 +1,146 @@ +m4_include(../googletest/m4/acx_pthread.m4) + +AC_INIT([Google C++ Mocking Framework], + [1.8.0], + [googlemock@googlegroups.com], + [gmock]) + +# Provide various options to initialize the Autoconf and configure processes. +AC_PREREQ([2.59]) +AC_CONFIG_SRCDIR([./LICENSE]) +AC_CONFIG_AUX_DIR([build-aux]) +AC_CONFIG_HEADERS([build-aux/config.h]) +AC_CONFIG_FILES([Makefile]) +AC_CONFIG_FILES([scripts/gmock-config], [chmod +x scripts/gmock-config]) + +# Initialize Automake with various options. We require at least v1.9, prevent +# pedantic complaints about package files, and enable various distribution +# targets. +AM_INIT_AUTOMAKE([1.9 dist-bzip2 dist-zip foreign subdir-objects]) + +# Check for programs used in building Google Test. +AC_PROG_CC +AC_PROG_CXX +AC_LANG([C++]) +AC_PROG_LIBTOOL + +# TODO(chandlerc@google.com): Currently we aren't running the Python tests +# against the interpreter detected by AM_PATH_PYTHON, and so we condition +# HAVE_PYTHON by requiring "python" to be in the PATH, and that interpreter's +# version to be >= 2.3. This will allow the scripts to use a "/usr/bin/env" +# hashbang. +PYTHON= # We *do not* allow the user to specify a python interpreter +AC_PATH_PROG([PYTHON],[python],[:]) +AS_IF([test "$PYTHON" != ":"], + [AM_PYTHON_CHECK_VERSION([$PYTHON],[2.3],[:],[PYTHON=":"])]) +AM_CONDITIONAL([HAVE_PYTHON],[test "$PYTHON" != ":"]) + +# TODO(chandlerc@google.com) Check for the necessary system headers. + +# Configure pthreads. +AC_ARG_WITH([pthreads], + [AS_HELP_STRING([--with-pthreads], + [use pthreads (default is yes)])], + [with_pthreads=$withval], + [with_pthreads=check]) + +have_pthreads=no +AS_IF([test "x$with_pthreads" != "xno"], + [ACX_PTHREAD( + [], + [AS_IF([test "x$with_pthreads" != "xcheck"], + [AC_MSG_FAILURE( + [--with-pthreads was specified, but unable to be used])])]) + have_pthreads="$acx_pthread_ok"]) +AM_CONDITIONAL([HAVE_PTHREADS],[test "x$have_pthreads" == "xyes"]) +AC_SUBST(PTHREAD_CFLAGS) +AC_SUBST(PTHREAD_LIBS) + +# GoogleMock currently has hard dependencies upon GoogleTest above and beyond +# running its own test suite, so we both provide our own version in +# a subdirectory and provide some logic to use a custom version or a system +# installed version. +AC_ARG_WITH([gtest], + [AS_HELP_STRING([--with-gtest], + [Specifies how to find the gtest package. If no + arguments are given, the default behavior, a + system installed gtest will be used if present, + and an internal version built otherwise. If a + path is provided, the gtest built or installed at + that prefix will be used.])], + [], + [with_gtest=yes]) +AC_ARG_ENABLE([external-gtest], + [AS_HELP_STRING([--disable-external-gtest], + [Disables any detection or use of a system + installed or user provided gtest. Any option to + '--with-gtest' is ignored. (Default is enabled.)]) + ], [], [enable_external_gtest=yes]) +AS_IF([test "x$with_gtest" == "xno"], + [AC_MSG_ERROR([dnl +Support for GoogleTest was explicitly disabled. Currently GoogleMock has a hard +dependency upon GoogleTest to build, please provide a version, or allow +GoogleMock to use any installed version and fall back upon its internal +version.])]) + +# Setup various GTEST variables. TODO(chandlerc@google.com): When these are +# used below, they should be used such that any pre-existing values always +# trump values we set them to, so that they can be used to selectively override +# details of the detection process. +AC_ARG_VAR([GTEST_CONFIG], + [The exact path of Google Test's 'gtest-config' script.]) +AC_ARG_VAR([GTEST_CPPFLAGS], + [C-like preprocessor flags for Google Test.]) +AC_ARG_VAR([GTEST_CXXFLAGS], + [C++ compile flags for Google Test.]) +AC_ARG_VAR([GTEST_LDFLAGS], + [Linker path and option flags for Google Test.]) +AC_ARG_VAR([GTEST_LIBS], + [Library linking flags for Google Test.]) +AC_ARG_VAR([GTEST_VERSION], + [The version of Google Test available.]) +HAVE_BUILT_GTEST="no" + +GTEST_MIN_VERSION="1.8.0" + +AS_IF([test "x${enable_external_gtest}" = "xyes"], + [# Begin filling in variables as we are able. + AS_IF([test "x${with_gtest}" != "xyes"], + [AS_IF([test -x "${with_gtest}/scripts/gtest-config"], + [GTEST_CONFIG="${with_gtest}/scripts/gtest-config"], + [GTEST_CONFIG="${with_gtest}/bin/gtest-config"]) + AS_IF([test -x "${GTEST_CONFIG}"], [], + [AC_MSG_ERROR([dnl +Unable to locate either a built or installed Google Test at '${with_gtest}'.]) + ])]) + + AS_IF([test -x "${GTEST_CONFIG}"], [], + [AC_PATH_PROG([GTEST_CONFIG], [gtest-config])]) + AS_IF([test -x "${GTEST_CONFIG}"], + [AC_MSG_CHECKING([for Google Test version >= ${GTEST_MIN_VERSION}]) + AS_IF([${GTEST_CONFIG} --min-version=${GTEST_MIN_VERSION}], + [AC_MSG_RESULT([yes]) + HAVE_BUILT_GTEST="yes"], + [AC_MSG_RESULT([no])])])]) + +AS_IF([test "x${HAVE_BUILT_GTEST}" = "xyes"], + [GTEST_CPPFLAGS=`${GTEST_CONFIG} --cppflags` + GTEST_CXXFLAGS=`${GTEST_CONFIG} --cxxflags` + GTEST_LDFLAGS=`${GTEST_CONFIG} --ldflags` + GTEST_LIBS=`${GTEST_CONFIG} --libs` + GTEST_VERSION=`${GTEST_CONFIG} --version`], + [AC_CONFIG_SUBDIRS([../googletest]) + # GTEST_CONFIG needs to be executable both in a Makefile environment and + # in a shell script environment, so resolve an absolute path for it here. + GTEST_CONFIG="`pwd -P`/../googletest/scripts/gtest-config" + GTEST_CPPFLAGS='-I$(top_srcdir)/../googletest/include' + GTEST_CXXFLAGS='-g' + GTEST_LDFLAGS='' + GTEST_LIBS='$(top_builddir)/../googletest/lib/libgtest.la' + GTEST_VERSION="${GTEST_MIN_VERSION}"]) + +# TODO(chandlerc@google.com) Check the types, structures, and other compiler +# and architecture characteristics. + +# Output the generated files. No further autoconf macros may be used. +AC_OUTPUT diff --git a/tools/external/googletest/googlemock/docs/CheatSheet.md b/tools/external/googletest/googlemock/docs/CheatSheet.md new file mode 100644 index 00000000..c6367fdd --- /dev/null +++ b/tools/external/googletest/googlemock/docs/CheatSheet.md @@ -0,0 +1,562 @@ + + +# Defining a Mock Class # + +## Mocking a Normal Class ## + +Given +``` +class Foo { + ... + virtual ~Foo(); + virtual int GetSize() const = 0; + virtual string Describe(const char* name) = 0; + virtual string Describe(int type) = 0; + virtual bool Process(Bar elem, int count) = 0; +}; +``` +(note that `~Foo()` **must** be virtual) we can define its mock as +``` +#include "gmock/gmock.h" + +class MockFoo : public Foo { + MOCK_CONST_METHOD0(GetSize, int()); + MOCK_METHOD1(Describe, string(const char* name)); + MOCK_METHOD1(Describe, string(int type)); + MOCK_METHOD2(Process, bool(Bar elem, int count)); +}; +``` + +To create a "nice" mock object which ignores all uninteresting calls, +or a "strict" mock object, which treats them as failures: +``` +NiceMock nice_foo; // The type is a subclass of MockFoo. +StrictMock strict_foo; // The type is a subclass of MockFoo. +``` + +## Mocking a Class Template ## + +To mock +``` +template +class StackInterface { + public: + ... + virtual ~StackInterface(); + virtual int GetSize() const = 0; + virtual void Push(const Elem& x) = 0; +}; +``` +(note that `~StackInterface()` **must** be virtual) just append `_T` to the `MOCK_*` macros: +``` +template +class MockStack : public StackInterface { + public: + ... + MOCK_CONST_METHOD0_T(GetSize, int()); + MOCK_METHOD1_T(Push, void(const Elem& x)); +}; +``` + +## Specifying Calling Conventions for Mock Functions ## + +If your mock function doesn't use the default calling convention, you +can specify it by appending `_WITH_CALLTYPE` to any of the macros +described in the previous two sections and supplying the calling +convention as the first argument to the macro. For example, +``` + MOCK_METHOD1_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int n)); + MOCK_CONST_METHOD2_WITH_CALLTYPE(STDMETHODCALLTYPE, Bar, int(double x, double y)); +``` +where `STDMETHODCALLTYPE` is defined by `` on Windows. + +# Using Mocks in Tests # + +The typical flow is: + 1. Import the Google Mock names you need to use. All Google Mock names are in the `testing` namespace unless they are macros or otherwise noted. + 1. Create the mock objects. + 1. Optionally, set the default actions of the mock objects. + 1. Set your expectations on the mock objects (How will they be called? What wil they do?). + 1. Exercise code that uses the mock objects; if necessary, check the result using [Google Test](../../googletest/) assertions. + 1. When a mock objects is destructed, Google Mock automatically verifies that all expectations on it have been satisfied. + +Here is an example: +``` +using ::testing::Return; // #1 + +TEST(BarTest, DoesThis) { + MockFoo foo; // #2 + + ON_CALL(foo, GetSize()) // #3 + .WillByDefault(Return(1)); + // ... other default actions ... + + EXPECT_CALL(foo, Describe(5)) // #4 + .Times(3) + .WillRepeatedly(Return("Category 5")); + // ... other expectations ... + + EXPECT_EQ("good", MyProductionFunction(&foo)); // #5 +} // #6 +``` + +# Setting Default Actions # + +Google Mock has a **built-in default action** for any function that +returns `void`, `bool`, a numeric value, or a pointer. + +To customize the default action for functions with return type `T` globally: +``` +using ::testing::DefaultValue; + +// Sets the default value to be returned. T must be CopyConstructible. +DefaultValue::Set(value); +// Sets a factory. Will be invoked on demand. T must be MoveConstructible. +// T MakeT(); +DefaultValue::SetFactory(&MakeT); +// ... use the mocks ... +// Resets the default value. +DefaultValue::Clear(); +``` + +To customize the default action for a particular method, use `ON_CALL()`: +``` +ON_CALL(mock_object, method(matchers)) + .With(multi_argument_matcher) ? + .WillByDefault(action); +``` + +# Setting Expectations # + +`EXPECT_CALL()` sets **expectations** on a mock method (How will it be +called? What will it do?): +``` +EXPECT_CALL(mock_object, method(matchers)) + .With(multi_argument_matcher) ? + .Times(cardinality) ? + .InSequence(sequences) * + .After(expectations) * + .WillOnce(action) * + .WillRepeatedly(action) ? + .RetiresOnSaturation(); ? +``` + +If `Times()` is omitted, the cardinality is assumed to be: + + * `Times(1)` when there is neither `WillOnce()` nor `WillRepeatedly()`; + * `Times(n)` when there are `n WillOnce()`s but no `WillRepeatedly()`, where `n` >= 1; or + * `Times(AtLeast(n))` when there are `n WillOnce()`s and a `WillRepeatedly()`, where `n` >= 0. + +A method with no `EXPECT_CALL()` is free to be invoked _any number of times_, and the default action will be taken each time. + +# Matchers # + +A **matcher** matches a _single_ argument. You can use it inside +`ON_CALL()` or `EXPECT_CALL()`, or use it to validate a value +directly: + +| `EXPECT_THAT(value, matcher)` | Asserts that `value` matches `matcher`. | +|:------------------------------|:----------------------------------------| +| `ASSERT_THAT(value, matcher)` | The same as `EXPECT_THAT(value, matcher)`, except that it generates a **fatal** failure. | + +Built-in matchers (where `argument` is the function argument) are +divided into several categories: + +## Wildcard ## +|`_`|`argument` can be any value of the correct type.| +|:--|:-----------------------------------------------| +|`A()` or `An()`|`argument` can be any value of type `type`. | + +## Generic Comparison ## + +|`Eq(value)` or `value`|`argument == value`| +|:---------------------|:------------------| +|`Ge(value)` |`argument >= value`| +|`Gt(value)` |`argument > value` | +|`Le(value)` |`argument <= value`| +|`Lt(value)` |`argument < value` | +|`Ne(value)` |`argument != value`| +|`IsNull()` |`argument` is a `NULL` pointer (raw or smart).| +|`NotNull()` |`argument` is a non-null pointer (raw or smart).| +|`Ref(variable)` |`argument` is a reference to `variable`.| +|`TypedEq(value)`|`argument` has type `type` and is equal to `value`. You may need to use this instead of `Eq(value)` when the mock function is overloaded.| + +Except `Ref()`, these matchers make a _copy_ of `value` in case it's +modified or destructed later. If the compiler complains that `value` +doesn't have a public copy constructor, try wrap it in `ByRef()`, +e.g. `Eq(ByRef(non_copyable_value))`. If you do that, make sure +`non_copyable_value` is not changed afterwards, or the meaning of your +matcher will be changed. + +## Floating-Point Matchers ## + +|`DoubleEq(a_double)`|`argument` is a `double` value approximately equal to `a_double`, treating two NaNs as unequal.| +|:-------------------|:----------------------------------------------------------------------------------------------| +|`FloatEq(a_float)` |`argument` is a `float` value approximately equal to `a_float`, treating two NaNs as unequal. | +|`NanSensitiveDoubleEq(a_double)`|`argument` is a `double` value approximately equal to `a_double`, treating two NaNs as equal. | +|`NanSensitiveFloatEq(a_float)`|`argument` is a `float` value approximately equal to `a_float`, treating two NaNs as equal. | + +The above matchers use ULP-based comparison (the same as used in +[Google Test](../../googletest/)). They +automatically pick a reasonable error bound based on the absolute +value of the expected value. `DoubleEq()` and `FloatEq()` conform to +the IEEE standard, which requires comparing two NaNs for equality to +return false. The `NanSensitive*` version instead treats two NaNs as +equal, which is often what a user wants. + +|`DoubleNear(a_double, max_abs_error)`|`argument` is a `double` value close to `a_double` (absolute error <= `max_abs_error`), treating two NaNs as unequal.| +|:------------------------------------|:--------------------------------------------------------------------------------------------------------------------| +|`FloatNear(a_float, max_abs_error)` |`argument` is a `float` value close to `a_float` (absolute error <= `max_abs_error`), treating two NaNs as unequal. | +|`NanSensitiveDoubleNear(a_double, max_abs_error)`|`argument` is a `double` value close to `a_double` (absolute error <= `max_abs_error`), treating two NaNs as equal. | +|`NanSensitiveFloatNear(a_float, max_abs_error)`|`argument` is a `float` value close to `a_float` (absolute error <= `max_abs_error`), treating two NaNs as equal. | + +## String Matchers ## + +The `argument` can be either a C string or a C++ string object: + +|`ContainsRegex(string)`|`argument` matches the given regular expression.| +|:----------------------|:-----------------------------------------------| +|`EndsWith(suffix)` |`argument` ends with string `suffix`. | +|`HasSubstr(string)` |`argument` contains `string` as a sub-string. | +|`MatchesRegex(string)` |`argument` matches the given regular expression with the match starting at the first character and ending at the last character.| +|`StartsWith(prefix)` |`argument` starts with string `prefix`. | +|`StrCaseEq(string)` |`argument` is equal to `string`, ignoring case. | +|`StrCaseNe(string)` |`argument` is not equal to `string`, ignoring case.| +|`StrEq(string)` |`argument` is equal to `string`. | +|`StrNe(string)` |`argument` is not equal to `string`. | + +`ContainsRegex()` and `MatchesRegex()` use the regular expression +syntax defined +[here](../../googletest/docs/AdvancedGuide.md#regular-expression-syntax). +`StrCaseEq()`, `StrCaseNe()`, `StrEq()`, and `StrNe()` work for wide +strings as well. + +## Container Matchers ## + +Most STL-style containers support `==`, so you can use +`Eq(expected_container)` or simply `expected_container` to match a +container exactly. If you want to write the elements in-line, +match them more flexibly, or get more informative messages, you can use: + +| `ContainerEq(container)` | The same as `Eq(container)` except that the failure message also includes which elements are in one container but not the other. | +|:-------------------------|:---------------------------------------------------------------------------------------------------------------------------------| +| `Contains(e)` | `argument` contains an element that matches `e`, which can be either a value or a matcher. | +| `Each(e)` | `argument` is a container where _every_ element matches `e`, which can be either a value or a matcher. | +| `ElementsAre(e0, e1, ..., en)` | `argument` has `n + 1` elements, where the i-th element matches `ei`, which can be a value or a matcher. 0 to 10 arguments are allowed. | +| `ElementsAreArray({ e0, e1, ..., en })`, `ElementsAreArray(array)`, or `ElementsAreArray(array, count)` | The same as `ElementsAre()` except that the expected element values/matchers come from an initializer list, STL-style container, or C-style array. | +| `IsEmpty()` | `argument` is an empty container (`container.empty()`). | +| `Pointwise(m, container)` | `argument` contains the same number of elements as in `container`, and for all i, (the i-th element in `argument`, the i-th element in `container`) match `m`, which is a matcher on 2-tuples. E.g. `Pointwise(Le(), upper_bounds)` verifies that each element in `argument` doesn't exceed the corresponding element in `upper_bounds`. See more detail below. | +| `SizeIs(m)` | `argument` is a container whose size matches `m`. E.g. `SizeIs(2)` or `SizeIs(Lt(2))`. | +| `UnorderedElementsAre(e0, e1, ..., en)` | `argument` has `n + 1` elements, and under some permutation each element matches an `ei` (for a different `i`), which can be a value or a matcher. 0 to 10 arguments are allowed. | +| `UnorderedElementsAreArray({ e0, e1, ..., en })`, `UnorderedElementsAreArray(array)`, or `UnorderedElementsAreArray(array, count)` | The same as `UnorderedElementsAre()` except that the expected element values/matchers come from an initializer list, STL-style container, or C-style array. | +| `WhenSorted(m)` | When `argument` is sorted using the `<` operator, it matches container matcher `m`. E.g. `WhenSorted(ElementsAre(1, 2, 3))` verifies that `argument` contains elements `1`, `2`, and `3`, ignoring order. | +| `WhenSortedBy(comparator, m)` | The same as `WhenSorted(m)`, except that the given comparator instead of `<` is used to sort `argument`. E.g. `WhenSortedBy(std::greater(), ElementsAre(3, 2, 1))`. | + +Notes: + + * These matchers can also match: + 1. a native array passed by reference (e.g. in `Foo(const int (&a)[5])`), and + 1. an array passed as a pointer and a count (e.g. in `Bar(const T* buffer, int len)` -- see [Multi-argument Matchers](#Multiargument_Matchers.md)). + * The array being matched may be multi-dimensional (i.e. its elements can be arrays). + * `m` in `Pointwise(m, ...)` should be a matcher for `::testing::tuple` where `T` and `U` are the element type of the actual container and the expected container, respectively. For example, to compare two `Foo` containers where `Foo` doesn't support `operator==` but has an `Equals()` method, one might write: + +``` +using ::testing::get; +MATCHER(FooEq, "") { + return get<0>(arg).Equals(get<1>(arg)); +} +... +EXPECT_THAT(actual_foos, Pointwise(FooEq(), expected_foos)); +``` + +## Member Matchers ## + +|`Field(&class::field, m)`|`argument.field` (or `argument->field` when `argument` is a plain pointer) matches matcher `m`, where `argument` is an object of type _class_.| +|:------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------| +|`Key(e)` |`argument.first` matches `e`, which can be either a value or a matcher. E.g. `Contains(Key(Le(5)))` can verify that a `map` contains a key `<= 5`.| +|`Pair(m1, m2)` |`argument` is an `std::pair` whose `first` field matches `m1` and `second` field matches `m2`. | +|`Property(&class::property, m)`|`argument.property()` (or `argument->property()` when `argument` is a plain pointer) matches matcher `m`, where `argument` is an object of type _class_.| + +## Matching the Result of a Function or Functor ## + +|`ResultOf(f, m)`|`f(argument)` matches matcher `m`, where `f` is a function or functor.| +|:---------------|:---------------------------------------------------------------------| + +## Pointer Matchers ## + +|`Pointee(m)`|`argument` (either a smart pointer or a raw pointer) points to a value that matches matcher `m`.| +|:-----------|:-----------------------------------------------------------------------------------------------| +|`WhenDynamicCastTo(m)`| when `argument` is passed through `dynamic_cast()`, it matches matcher `m`. | + +## Multiargument Matchers ## + +Technically, all matchers match a _single_ value. A "multi-argument" +matcher is just one that matches a _tuple_. The following matchers can +be used to match a tuple `(x, y)`: + +|`Eq()`|`x == y`| +|:-----|:-------| +|`Ge()`|`x >= y`| +|`Gt()`|`x > y` | +|`Le()`|`x <= y`| +|`Lt()`|`x < y` | +|`Ne()`|`x != y`| + +You can use the following selectors to pick a subset of the arguments +(or reorder them) to participate in the matching: + +|`AllArgs(m)`|Equivalent to `m`. Useful as syntactic sugar in `.With(AllArgs(m))`.| +|:-----------|:-------------------------------------------------------------------| +|`Args(m)`|The tuple of the `k` selected (using 0-based indices) arguments matches `m`, e.g. `Args<1, 2>(Eq())`.| + +## Composite Matchers ## + +You can make a matcher from one or more other matchers: + +|`AllOf(m1, m2, ..., mn)`|`argument` matches all of the matchers `m1` to `mn`.| +|:-----------------------|:---------------------------------------------------| +|`AnyOf(m1, m2, ..., mn)`|`argument` matches at least one of the matchers `m1` to `mn`.| +|`Not(m)` |`argument` doesn't match matcher `m`. | + +## Adapters for Matchers ## + +|`MatcherCast(m)`|casts matcher `m` to type `Matcher`.| +|:------------------|:--------------------------------------| +|`SafeMatcherCast(m)`| [safely casts](CookBook.md#casting-matchers) matcher `m` to type `Matcher`. | +|`Truly(predicate)` |`predicate(argument)` returns something considered by C++ to be true, where `predicate` is a function or functor.| + +## Matchers as Predicates ## + +|`Matches(m)(value)`|evaluates to `true` if `value` matches `m`. You can use `Matches(m)` alone as a unary functor.| +|:------------------|:---------------------------------------------------------------------------------------------| +|`ExplainMatchResult(m, value, result_listener)`|evaluates to `true` if `value` matches `m`, explaining the result to `result_listener`. | +|`Value(value, m)` |evaluates to `true` if `value` matches `m`. | + +## Defining Matchers ## + +| `MATCHER(IsEven, "") { return (arg % 2) == 0; }` | Defines a matcher `IsEven()` to match an even number. | +|:-------------------------------------------------|:------------------------------------------------------| +| `MATCHER_P(IsDivisibleBy, n, "") { *result_listener << "where the remainder is " << (arg % n); return (arg % n) == 0; }` | Defines a macher `IsDivisibleBy(n)` to match a number divisible by `n`. | +| `MATCHER_P2(IsBetween, a, b, std::string(negation ? "isn't" : "is") + " between " + PrintToString(a) + " and " + PrintToString(b)) { return a <= arg && arg <= b; }` | Defines a matcher `IsBetween(a, b)` to match a value in the range [`a`, `b`]. | + +**Notes:** + + 1. The `MATCHER*` macros cannot be used inside a function or class. + 1. The matcher body must be _purely functional_ (i.e. it cannot have any side effect, and the result must not depend on anything other than the value being matched and the matcher parameters). + 1. You can use `PrintToString(x)` to convert a value `x` of any type to a string. + +## Matchers as Test Assertions ## + +|`ASSERT_THAT(expression, m)`|Generates a [fatal failure](../../googletest/docs/Primer.md#assertions) if the value of `expression` doesn't match matcher `m`.| +|:---------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------| +|`EXPECT_THAT(expression, m)`|Generates a non-fatal failure if the value of `expression` doesn't match matcher `m`. | + +# Actions # + +**Actions** specify what a mock function should do when invoked. + +## Returning a Value ## + +|`Return()`|Return from a `void` mock function.| +|:---------|:----------------------------------| +|`Return(value)`|Return `value`. If the type of `value` is different to the mock function's return type, `value` is converted to the latter type at the time the expectation is set, not when the action is executed.| +|`ReturnArg()`|Return the `N`-th (0-based) argument.| +|`ReturnNew(a1, ..., ak)`|Return `new T(a1, ..., ak)`; a different object is created each time.| +|`ReturnNull()`|Return a null pointer. | +|`ReturnPointee(ptr)`|Return the value pointed to by `ptr`.| +|`ReturnRef(variable)`|Return a reference to `variable`. | +|`ReturnRefOfCopy(value)`|Return a reference to a copy of `value`; the copy lives as long as the action.| + +## Side Effects ## + +|`Assign(&variable, value)`|Assign `value` to variable.| +|:-------------------------|:--------------------------| +| `DeleteArg()` | Delete the `N`-th (0-based) argument, which must be a pointer. | +| `SaveArg(pointer)` | Save the `N`-th (0-based) argument to `*pointer`. | +| `SaveArgPointee(pointer)` | Save the value pointed to by the `N`-th (0-based) argument to `*pointer`. | +| `SetArgReferee(value)` | Assign value to the variable referenced by the `N`-th (0-based) argument. | +|`SetArgPointee(value)` |Assign `value` to the variable pointed by the `N`-th (0-based) argument.| +|`SetArgumentPointee(value)`|Same as `SetArgPointee(value)`. Deprecated. Will be removed in v1.7.0.| +|`SetArrayArgument(first, last)`|Copies the elements in source range [`first`, `last`) to the array pointed to by the `N`-th (0-based) argument, which can be either a pointer or an iterator. The action does not take ownership of the elements in the source range.| +|`SetErrnoAndReturn(error, value)`|Set `errno` to `error` and return `value`.| +|`Throw(exception)` |Throws the given exception, which can be any copyable value. Available since v1.1.0.| + +## Using a Function or a Functor as an Action ## + +|`Invoke(f)`|Invoke `f` with the arguments passed to the mock function, where `f` can be a global/static function or a functor.| +|:----------|:-----------------------------------------------------------------------------------------------------------------| +|`Invoke(object_pointer, &class::method)`|Invoke the {method on the object with the arguments passed to the mock function. | +|`InvokeWithoutArgs(f)`|Invoke `f`, which can be a global/static function or a functor. `f` must take no arguments. | +|`InvokeWithoutArgs(object_pointer, &class::method)`|Invoke the method on the object, which takes no arguments. | +|`InvokeArgument(arg1, arg2, ..., argk)`|Invoke the mock function's `N`-th (0-based) argument, which must be a function or a functor, with the `k` arguments.| + +The return value of the invoked function is used as the return value +of the action. + +When defining a function or functor to be used with `Invoke*()`, you can declare any unused parameters as `Unused`: +``` + double Distance(Unused, double x, double y) { return sqrt(x*x + y*y); } + ... + EXPECT_CALL(mock, Foo("Hi", _, _)).WillOnce(Invoke(Distance)); +``` + +In `InvokeArgument(...)`, if an argument needs to be passed by reference, wrap it inside `ByRef()`. For example, +``` + InvokeArgument<2>(5, string("Hi"), ByRef(foo)) +``` +calls the mock function's #2 argument, passing to it `5` and `string("Hi")` by value, and `foo` by reference. + +## Default Action ## + +|`DoDefault()`|Do the default action (specified by `ON_CALL()` or the built-in one).| +|:------------|:--------------------------------------------------------------------| + +**Note:** due to technical reasons, `DoDefault()` cannot be used inside a composite action - trying to do so will result in a run-time error. + +## Composite Actions ## + +|`DoAll(a1, a2, ..., an)`|Do all actions `a1` to `an` and return the result of `an` in each invocation. The first `n - 1` sub-actions must return void. | +|:-----------------------|:-----------------------------------------------------------------------------------------------------------------------------| +|`IgnoreResult(a)` |Perform action `a` and ignore its result. `a` must not return void. | +|`WithArg(a)` |Pass the `N`-th (0-based) argument of the mock function to action `a` and perform it. | +|`WithArgs(a)`|Pass the selected (0-based) arguments of the mock function to action `a` and perform it. | +|`WithoutArgs(a)` |Perform action `a` without any arguments. | + +## Defining Actions ## + +| `ACTION(Sum) { return arg0 + arg1; }` | Defines an action `Sum()` to return the sum of the mock function's argument #0 and #1. | +|:--------------------------------------|:---------------------------------------------------------------------------------------| +| `ACTION_P(Plus, n) { return arg0 + n; }` | Defines an action `Plus(n)` to return the sum of the mock function's argument #0 and `n`. | +| `ACTION_Pk(Foo, p1, ..., pk) { statements; }` | Defines a parameterized action `Foo(p1, ..., pk)` to execute the given `statements`. | + +The `ACTION*` macros cannot be used inside a function or class. + +# Cardinalities # + +These are used in `Times()` to specify how many times a mock function will be called: + +|`AnyNumber()`|The function can be called any number of times.| +|:------------|:----------------------------------------------| +|`AtLeast(n)` |The call is expected at least `n` times. | +|`AtMost(n)` |The call is expected at most `n` times. | +|`Between(m, n)`|The call is expected between `m` and `n` (inclusive) times.| +|`Exactly(n) or n`|The call is expected exactly `n` times. In particular, the call should never happen when `n` is 0.| + +# Expectation Order # + +By default, the expectations can be matched in _any_ order. If some +or all expectations must be matched in a given order, there are two +ways to specify it. They can be used either independently or +together. + +## The After Clause ## + +``` +using ::testing::Expectation; +... +Expectation init_x = EXPECT_CALL(foo, InitX()); +Expectation init_y = EXPECT_CALL(foo, InitY()); +EXPECT_CALL(foo, Bar()) + .After(init_x, init_y); +``` +says that `Bar()` can be called only after both `InitX()` and +`InitY()` have been called. + +If you don't know how many pre-requisites an expectation has when you +write it, you can use an `ExpectationSet` to collect them: + +``` +using ::testing::ExpectationSet; +... +ExpectationSet all_inits; +for (int i = 0; i < element_count; i++) { + all_inits += EXPECT_CALL(foo, InitElement(i)); +} +EXPECT_CALL(foo, Bar()) + .After(all_inits); +``` +says that `Bar()` can be called only after all elements have been +initialized (but we don't care about which elements get initialized +before the others). + +Modifying an `ExpectationSet` after using it in an `.After()` doesn't +affect the meaning of the `.After()`. + +## Sequences ## + +When you have a long chain of sequential expectations, it's easier to +specify the order using **sequences**, which don't require you to given +each expectation in the chain a different name. All expected
    +calls
    in the same sequence must occur in the order they are +specified. + +``` +using ::testing::Sequence; +Sequence s1, s2; +... +EXPECT_CALL(foo, Reset()) + .InSequence(s1, s2) + .WillOnce(Return(true)); +EXPECT_CALL(foo, GetSize()) + .InSequence(s1) + .WillOnce(Return(1)); +EXPECT_CALL(foo, Describe(A())) + .InSequence(s2) + .WillOnce(Return("dummy")); +``` +says that `Reset()` must be called before _both_ `GetSize()` _and_ +`Describe()`, and the latter two can occur in any order. + +To put many expectations in a sequence conveniently: +``` +using ::testing::InSequence; +{ + InSequence dummy; + + EXPECT_CALL(...)...; + EXPECT_CALL(...)...; + ... + EXPECT_CALL(...)...; +} +``` +says that all expected calls in the scope of `dummy` must occur in +strict order. The name `dummy` is irrelevant.) + +# Verifying and Resetting a Mock # + +Google Mock will verify the expectations on a mock object when it is destructed, or you can do it earlier: +``` +using ::testing::Mock; +... +// Verifies and removes the expectations on mock_obj; +// returns true iff successful. +Mock::VerifyAndClearExpectations(&mock_obj); +... +// Verifies and removes the expectations on mock_obj; +// also removes the default actions set by ON_CALL(); +// returns true iff successful. +Mock::VerifyAndClear(&mock_obj); +``` + +You can also tell Google Mock that a mock object can be leaked and doesn't +need to be verified: +``` +Mock::AllowLeak(&mock_obj); +``` + +# Mock Classes # + +Google Mock defines a convenient mock class template +``` +class MockFunction { + public: + MOCK_METHODn(Call, R(A1, ..., An)); +}; +``` +See this [recipe](CookBook.md#using-check-points) for one application of it. + +# Flags # + +| `--gmock_catch_leaked_mocks=0` | Don't report leaked mock objects as failures. | +|:-------------------------------|:----------------------------------------------| +| `--gmock_verbose=LEVEL` | Sets the default verbosity level (`info`, `warning`, or `error`) of Google Mock messages. | diff --git a/tools/external/googletest/googlemock/docs/CookBook.md b/tools/external/googletest/googlemock/docs/CookBook.md new file mode 100644 index 00000000..3d07e68b --- /dev/null +++ b/tools/external/googletest/googlemock/docs/CookBook.md @@ -0,0 +1,3679 @@ + + +You can find recipes for using Google Mock here. If you haven't yet, +please read the [ForDummies](ForDummies.md) document first to make sure you understand +the basics. + +**Note:** Google Mock lives in the `testing` name space. For +readability, it is recommended to write `using ::testing::Foo;` once in +your file before using the name `Foo` defined by Google Mock. We omit +such `using` statements in this page for brevity, but you should do it +in your own code. + +# Creating Mock Classes # + +## Mocking Private or Protected Methods ## + +You must always put a mock method definition (`MOCK_METHOD*`) in a +`public:` section of the mock class, regardless of the method being +mocked being `public`, `protected`, or `private` in the base class. +This allows `ON_CALL` and `EXPECT_CALL` to reference the mock function +from outside of the mock class. (Yes, C++ allows a subclass to specify +a different access level than the base class on a virtual function.) +Example: + +``` +class Foo { + public: + ... + virtual bool Transform(Gadget* g) = 0; + + protected: + virtual void Resume(); + + private: + virtual int GetTimeOut(); +}; + +class MockFoo : public Foo { + public: + ... + MOCK_METHOD1(Transform, bool(Gadget* g)); + + // The following must be in the public section, even though the + // methods are protected or private in the base class. + MOCK_METHOD0(Resume, void()); + MOCK_METHOD0(GetTimeOut, int()); +}; +``` + +## Mocking Overloaded Methods ## + +You can mock overloaded functions as usual. No special attention is required: + +``` +class Foo { + ... + + // Must be virtual as we'll inherit from Foo. + virtual ~Foo(); + + // Overloaded on the types and/or numbers of arguments. + virtual int Add(Element x); + virtual int Add(int times, Element x); + + // Overloaded on the const-ness of this object. + virtual Bar& GetBar(); + virtual const Bar& GetBar() const; +}; + +class MockFoo : public Foo { + ... + MOCK_METHOD1(Add, int(Element x)); + MOCK_METHOD2(Add, int(int times, Element x); + + MOCK_METHOD0(GetBar, Bar&()); + MOCK_CONST_METHOD0(GetBar, const Bar&()); +}; +``` + +**Note:** if you don't mock all versions of the overloaded method, the +compiler will give you a warning about some methods in the base class +being hidden. To fix that, use `using` to bring them in scope: + +``` +class MockFoo : public Foo { + ... + using Foo::Add; + MOCK_METHOD1(Add, int(Element x)); + // We don't want to mock int Add(int times, Element x); + ... +}; +``` + +## Mocking Class Templates ## + +To mock a class template, append `_T` to the `MOCK_*` macros: + +``` +template +class StackInterface { + ... + // Must be virtual as we'll inherit from StackInterface. + virtual ~StackInterface(); + + virtual int GetSize() const = 0; + virtual void Push(const Elem& x) = 0; +}; + +template +class MockStack : public StackInterface { + ... + MOCK_CONST_METHOD0_T(GetSize, int()); + MOCK_METHOD1_T(Push, void(const Elem& x)); +}; +``` + +## Mocking Nonvirtual Methods ## + +Google Mock can mock non-virtual functions to be used in what we call _hi-perf +dependency injection_. + +In this case, instead of sharing a common base class with the real +class, your mock class will be _unrelated_ to the real class, but +contain methods with the same signatures. The syntax for mocking +non-virtual methods is the _same_ as mocking virtual methods: + +``` +// A simple packet stream class. None of its members is virtual. +class ConcretePacketStream { + public: + void AppendPacket(Packet* new_packet); + const Packet* GetPacket(size_t packet_number) const; + size_t NumberOfPackets() const; + ... +}; + +// A mock packet stream class. It inherits from no other, but defines +// GetPacket() and NumberOfPackets(). +class MockPacketStream { + public: + MOCK_CONST_METHOD1(GetPacket, const Packet*(size_t packet_number)); + MOCK_CONST_METHOD0(NumberOfPackets, size_t()); + ... +}; +``` + +Note that the mock class doesn't define `AppendPacket()`, unlike the +real class. That's fine as long as the test doesn't need to call it. + +Next, you need a way to say that you want to use +`ConcretePacketStream` in production code and to use `MockPacketStream` +in tests. Since the functions are not virtual and the two classes are +unrelated, you must specify your choice at _compile time_ (as opposed +to run time). + +One way to do it is to templatize your code that needs to use a packet +stream. More specifically, you will give your code a template type +argument for the type of the packet stream. In production, you will +instantiate your template with `ConcretePacketStream` as the type +argument. In tests, you will instantiate the same template with +`MockPacketStream`. For example, you may write: + +``` +template +void CreateConnection(PacketStream* stream) { ... } + +template +class PacketReader { + public: + void ReadPackets(PacketStream* stream, size_t packet_num); +}; +``` + +Then you can use `CreateConnection()` and +`PacketReader` in production code, and use +`CreateConnection()` and +`PacketReader` in tests. + +``` + MockPacketStream mock_stream; + EXPECT_CALL(mock_stream, ...)...; + .. set more expectations on mock_stream ... + PacketReader reader(&mock_stream); + ... exercise reader ... +``` + +## Mocking Free Functions ## + +It's possible to use Google Mock to mock a free function (i.e. a +C-style function or a static method). You just need to rewrite your +code to use an interface (abstract class). + +Instead of calling a free function (say, `OpenFile`) directly, +introduce an interface for it and have a concrete subclass that calls +the free function: + +``` +class FileInterface { + public: + ... + virtual bool Open(const char* path, const char* mode) = 0; +}; + +class File : public FileInterface { + public: + ... + virtual bool Open(const char* path, const char* mode) { + return OpenFile(path, mode); + } +}; +``` + +Your code should talk to `FileInterface` to open a file. Now it's +easy to mock out the function. + +This may seem much hassle, but in practice you often have multiple +related functions that you can put in the same interface, so the +per-function syntactic overhead will be much lower. + +If you are concerned about the performance overhead incurred by +virtual functions, and profiling confirms your concern, you can +combine this with the recipe for [mocking non-virtual methods](#mocking-nonvirtual-methods). + +## The Nice, the Strict, and the Naggy ## + +If a mock method has no `EXPECT_CALL` spec but is called, Google Mock +will print a warning about the "uninteresting call". The rationale is: + + * New methods may be added to an interface after a test is written. We shouldn't fail a test just because a method it doesn't know about is called. + * However, this may also mean there's a bug in the test, so Google Mock shouldn't be silent either. If the user believes these calls are harmless, they can add an `EXPECT_CALL()` to suppress the warning. + +However, sometimes you may want to suppress all "uninteresting call" +warnings, while sometimes you may want the opposite, i.e. to treat all +of them as errors. Google Mock lets you make the decision on a +per-mock-object basis. + +Suppose your test uses a mock class `MockFoo`: + +``` +TEST(...) { + MockFoo mock_foo; + EXPECT_CALL(mock_foo, DoThis()); + ... code that uses mock_foo ... +} +``` + +If a method of `mock_foo` other than `DoThis()` is called, it will be +reported by Google Mock as a warning. However, if you rewrite your +test to use `NiceMock` instead, the warning will be gone, +resulting in a cleaner test output: + +``` +using ::testing::NiceMock; + +TEST(...) { + NiceMock mock_foo; + EXPECT_CALL(mock_foo, DoThis()); + ... code that uses mock_foo ... +} +``` + +`NiceMock` is a subclass of `MockFoo`, so it can be used +wherever `MockFoo` is accepted. + +It also works if `MockFoo`'s constructor takes some arguments, as +`NiceMock` "inherits" `MockFoo`'s constructors: + +``` +using ::testing::NiceMock; + +TEST(...) { + NiceMock mock_foo(5, "hi"); // Calls MockFoo(5, "hi"). + EXPECT_CALL(mock_foo, DoThis()); + ... code that uses mock_foo ... +} +``` + +The usage of `StrictMock` is similar, except that it makes all +uninteresting calls failures: + +``` +using ::testing::StrictMock; + +TEST(...) { + StrictMock mock_foo; + EXPECT_CALL(mock_foo, DoThis()); + ... code that uses mock_foo ... + + // The test will fail if a method of mock_foo other than DoThis() + // is called. +} +``` + +There are some caveats though (I don't like them just as much as the +next guy, but sadly they are side effects of C++'s limitations): + + 1. `NiceMock` and `StrictMock` only work for mock methods defined using the `MOCK_METHOD*` family of macros **directly** in the `MockFoo` class. If a mock method is defined in a **base class** of `MockFoo`, the "nice" or "strict" modifier may not affect it, depending on the compiler. In particular, nesting `NiceMock` and `StrictMock` (e.g. `NiceMock >`) is **not** supported. + 1. The constructors of the base mock (`MockFoo`) cannot have arguments passed by non-const reference, which happens to be banned by the [Google C++ style guide](https://google.github.io/styleguide/cppguide.html). + 1. During the constructor or destructor of `MockFoo`, the mock object is _not_ nice or strict. This may cause surprises if the constructor or destructor calls a mock method on `this` object. (This behavior, however, is consistent with C++'s general rule: if a constructor or destructor calls a virtual method of `this` object, that method is treated as non-virtual. In other words, to the base class's constructor or destructor, `this` object behaves like an instance of the base class, not the derived class. This rule is required for safety. Otherwise a base constructor may use members of a derived class before they are initialized, or a base destructor may use members of a derived class after they have been destroyed.) + +Finally, you should be **very cautious** about when to use naggy or strict mocks, as they tend to make tests more brittle and harder to maintain. When you refactor your code without changing its externally visible behavior, ideally you should't need to update any tests. If your code interacts with a naggy mock, however, you may start to get spammed with warnings as the result of your change. Worse, if your code interacts with a strict mock, your tests may start to fail and you'll be forced to fix them. Our general recommendation is to use nice mocks (not yet the default) most of the time, use naggy mocks (the current default) when developing or debugging tests, and use strict mocks only as the last resort. + +## Simplifying the Interface without Breaking Existing Code ## + +Sometimes a method has a long list of arguments that is mostly +uninteresting. For example, + +``` +class LogSink { + public: + ... + virtual void send(LogSeverity severity, const char* full_filename, + const char* base_filename, int line, + const struct tm* tm_time, + const char* message, size_t message_len) = 0; +}; +``` + +This method's argument list is lengthy and hard to work with (let's +say that the `message` argument is not even 0-terminated). If we mock +it as is, using the mock will be awkward. If, however, we try to +simplify this interface, we'll need to fix all clients depending on +it, which is often infeasible. + +The trick is to re-dispatch the method in the mock class: + +``` +class ScopedMockLog : public LogSink { + public: + ... + virtual void send(LogSeverity severity, const char* full_filename, + const char* base_filename, int line, const tm* tm_time, + const char* message, size_t message_len) { + // We are only interested in the log severity, full file name, and + // log message. + Log(severity, full_filename, std::string(message, message_len)); + } + + // Implements the mock method: + // + // void Log(LogSeverity severity, + // const string& file_path, + // const string& message); + MOCK_METHOD3(Log, void(LogSeverity severity, const string& file_path, + const string& message)); +}; +``` + +By defining a new mock method with a trimmed argument list, we make +the mock class much more user-friendly. + +## Alternative to Mocking Concrete Classes ## + +Often you may find yourself using classes that don't implement +interfaces. In order to test your code that uses such a class (let's +call it `Concrete`), you may be tempted to make the methods of +`Concrete` virtual and then mock it. + +Try not to do that. + +Making a non-virtual function virtual is a big decision. It creates an +extension point where subclasses can tweak your class' behavior. This +weakens your control on the class because now it's harder to maintain +the class' invariants. You should make a function virtual only when +there is a valid reason for a subclass to override it. + +Mocking concrete classes directly is problematic as it creates a tight +coupling between the class and the tests - any small change in the +class may invalidate your tests and make test maintenance a pain. + +To avoid such problems, many programmers have been practicing "coding +to interfaces": instead of talking to the `Concrete` class, your code +would define an interface and talk to it. Then you implement that +interface as an adaptor on top of `Concrete`. In tests, you can easily +mock that interface to observe how your code is doing. + +This technique incurs some overhead: + + * You pay the cost of virtual function calls (usually not a problem). + * There is more abstraction for the programmers to learn. + +However, it can also bring significant benefits in addition to better +testability: + + * `Concrete`'s API may not fit your problem domain very well, as you may not be the only client it tries to serve. By designing your own interface, you have a chance to tailor it to your need - you may add higher-level functionalities, rename stuff, etc instead of just trimming the class. This allows you to write your code (user of the interface) in a more natural way, which means it will be more readable, more maintainable, and you'll be more productive. + * If `Concrete`'s implementation ever has to change, you don't have to rewrite everywhere it is used. Instead, you can absorb the change in your implementation of the interface, and your other code and tests will be insulated from this change. + +Some people worry that if everyone is practicing this technique, they +will end up writing lots of redundant code. This concern is totally +understandable. However, there are two reasons why it may not be the +case: + + * Different projects may need to use `Concrete` in different ways, so the best interfaces for them will be different. Therefore, each of them will have its own domain-specific interface on top of `Concrete`, and they will not be the same code. + * If enough projects want to use the same interface, they can always share it, just like they have been sharing `Concrete`. You can check in the interface and the adaptor somewhere near `Concrete` (perhaps in a `contrib` sub-directory) and let many projects use it. + +You need to weigh the pros and cons carefully for your particular +problem, but I'd like to assure you that the Java community has been +practicing this for a long time and it's a proven effective technique +applicable in a wide variety of situations. :-) + +## Delegating Calls to a Fake ## + +Some times you have a non-trivial fake implementation of an +interface. For example: + +``` +class Foo { + public: + virtual ~Foo() {} + virtual char DoThis(int n) = 0; + virtual void DoThat(const char* s, int* p) = 0; +}; + +class FakeFoo : public Foo { + public: + virtual char DoThis(int n) { + return (n > 0) ? '+' : + (n < 0) ? '-' : '0'; + } + + virtual void DoThat(const char* s, int* p) { + *p = strlen(s); + } +}; +``` + +Now you want to mock this interface such that you can set expectations +on it. However, you also want to use `FakeFoo` for the default +behavior, as duplicating it in the mock object is, well, a lot of +work. + +When you define the mock class using Google Mock, you can have it +delegate its default action to a fake class you already have, using +this pattern: + +``` +using ::testing::_; +using ::testing::Invoke; + +class MockFoo : public Foo { + public: + // Normal mock method definitions using Google Mock. + MOCK_METHOD1(DoThis, char(int n)); + MOCK_METHOD2(DoThat, void(const char* s, int* p)); + + // Delegates the default actions of the methods to a FakeFoo object. + // This must be called *before* the custom ON_CALL() statements. + void DelegateToFake() { + ON_CALL(*this, DoThis(_)) + .WillByDefault(Invoke(&fake_, &FakeFoo::DoThis)); + ON_CALL(*this, DoThat(_, _)) + .WillByDefault(Invoke(&fake_, &FakeFoo::DoThat)); + } + private: + FakeFoo fake_; // Keeps an instance of the fake in the mock. +}; +``` + +With that, you can use `MockFoo` in your tests as usual. Just remember +that if you don't explicitly set an action in an `ON_CALL()` or +`EXPECT_CALL()`, the fake will be called upon to do it: + +``` +using ::testing::_; + +TEST(AbcTest, Xyz) { + MockFoo foo; + foo.DelegateToFake(); // Enables the fake for delegation. + + // Put your ON_CALL(foo, ...)s here, if any. + + // No action specified, meaning to use the default action. + EXPECT_CALL(foo, DoThis(5)); + EXPECT_CALL(foo, DoThat(_, _)); + + int n = 0; + EXPECT_EQ('+', foo.DoThis(5)); // FakeFoo::DoThis() is invoked. + foo.DoThat("Hi", &n); // FakeFoo::DoThat() is invoked. + EXPECT_EQ(2, n); +} +``` + +**Some tips:** + + * If you want, you can still override the default action by providing your own `ON_CALL()` or using `.WillOnce()` / `.WillRepeatedly()` in `EXPECT_CALL()`. + * In `DelegateToFake()`, you only need to delegate the methods whose fake implementation you intend to use. + * The general technique discussed here works for overloaded methods, but you'll need to tell the compiler which version you mean. To disambiguate a mock function (the one you specify inside the parentheses of `ON_CALL()`), see the "Selecting Between Overloaded Functions" section on this page; to disambiguate a fake function (the one you place inside `Invoke()`), use a `static_cast` to specify the function's type. For instance, if class `Foo` has methods `char DoThis(int n)` and `bool DoThis(double x) const`, and you want to invoke the latter, you need to write `Invoke(&fake_, static_cast(&FakeFoo::DoThis))` instead of `Invoke(&fake_, &FakeFoo::DoThis)` (The strange-looking thing inside the angled brackets of `static_cast` is the type of a function pointer to the second `DoThis()` method.). + * Having to mix a mock and a fake is often a sign of something gone wrong. Perhaps you haven't got used to the interaction-based way of testing yet. Or perhaps your interface is taking on too many roles and should be split up. Therefore, **don't abuse this**. We would only recommend to do it as an intermediate step when you are refactoring your code. + +Regarding the tip on mixing a mock and a fake, here's an example on +why it may be a bad sign: Suppose you have a class `System` for +low-level system operations. In particular, it does file and I/O +operations. And suppose you want to test how your code uses `System` +to do I/O, and you just want the file operations to work normally. If +you mock out the entire `System` class, you'll have to provide a fake +implementation for the file operation part, which suggests that +`System` is taking on too many roles. + +Instead, you can define a `FileOps` interface and an `IOOps` interface +and split `System`'s functionalities into the two. Then you can mock +`IOOps` without mocking `FileOps`. + +## Delegating Calls to a Real Object ## + +When using testing doubles (mocks, fakes, stubs, and etc), sometimes +their behaviors will differ from those of the real objects. This +difference could be either intentional (as in simulating an error such +that you can test the error handling code) or unintentional. If your +mocks have different behaviors than the real objects by mistake, you +could end up with code that passes the tests but fails in production. + +You can use the _delegating-to-real_ technique to ensure that your +mock has the same behavior as the real object while retaining the +ability to validate calls. This technique is very similar to the +delegating-to-fake technique, the difference being that we use a real +object instead of a fake. Here's an example: + +``` +using ::testing::_; +using ::testing::AtLeast; +using ::testing::Invoke; + +class MockFoo : public Foo { + public: + MockFoo() { + // By default, all calls are delegated to the real object. + ON_CALL(*this, DoThis()) + .WillByDefault(Invoke(&real_, &Foo::DoThis)); + ON_CALL(*this, DoThat(_)) + .WillByDefault(Invoke(&real_, &Foo::DoThat)); + ... + } + MOCK_METHOD0(DoThis, ...); + MOCK_METHOD1(DoThat, ...); + ... + private: + Foo real_; +}; +... + + MockFoo mock; + + EXPECT_CALL(mock, DoThis()) + .Times(3); + EXPECT_CALL(mock, DoThat("Hi")) + .Times(AtLeast(1)); + ... use mock in test ... +``` + +With this, Google Mock will verify that your code made the right calls +(with the right arguments, in the right order, called the right number +of times, etc), and a real object will answer the calls (so the +behavior will be the same as in production). This gives you the best +of both worlds. + +## Delegating Calls to a Parent Class ## + +Ideally, you should code to interfaces, whose methods are all pure +virtual. In reality, sometimes you do need to mock a virtual method +that is not pure (i.e, it already has an implementation). For example: + +``` +class Foo { + public: + virtual ~Foo(); + + virtual void Pure(int n) = 0; + virtual int Concrete(const char* str) { ... } +}; + +class MockFoo : public Foo { + public: + // Mocking a pure method. + MOCK_METHOD1(Pure, void(int n)); + // Mocking a concrete method. Foo::Concrete() is shadowed. + MOCK_METHOD1(Concrete, int(const char* str)); +}; +``` + +Sometimes you may want to call `Foo::Concrete()` instead of +`MockFoo::Concrete()`. Perhaps you want to do it as part of a stub +action, or perhaps your test doesn't need to mock `Concrete()` at all +(but it would be oh-so painful to have to define a new mock class +whenever you don't need to mock one of its methods). + +The trick is to leave a back door in your mock class for accessing the +real methods in the base class: + +``` +class MockFoo : public Foo { + public: + // Mocking a pure method. + MOCK_METHOD1(Pure, void(int n)); + // Mocking a concrete method. Foo::Concrete() is shadowed. + MOCK_METHOD1(Concrete, int(const char* str)); + + // Use this to call Concrete() defined in Foo. + int FooConcrete(const char* str) { return Foo::Concrete(str); } +}; +``` + +Now, you can call `Foo::Concrete()` inside an action by: + +``` +using ::testing::_; +using ::testing::Invoke; +... + EXPECT_CALL(foo, Concrete(_)) + .WillOnce(Invoke(&foo, &MockFoo::FooConcrete)); +``` + +or tell the mock object that you don't want to mock `Concrete()`: + +``` +using ::testing::Invoke; +... + ON_CALL(foo, Concrete(_)) + .WillByDefault(Invoke(&foo, &MockFoo::FooConcrete)); +``` + +(Why don't we just write `Invoke(&foo, &Foo::Concrete)`? If you do +that, `MockFoo::Concrete()` will be called (and cause an infinite +recursion) since `Foo::Concrete()` is virtual. That's just how C++ +works.) + +# Using Matchers # + +## Matching Argument Values Exactly ## + +You can specify exactly which arguments a mock method is expecting: + +``` +using ::testing::Return; +... + EXPECT_CALL(foo, DoThis(5)) + .WillOnce(Return('a')); + EXPECT_CALL(foo, DoThat("Hello", bar)); +``` + +## Using Simple Matchers ## + +You can use matchers to match arguments that have a certain property: + +``` +using ::testing::Ge; +using ::testing::NotNull; +using ::testing::Return; +... + EXPECT_CALL(foo, DoThis(Ge(5))) // The argument must be >= 5. + .WillOnce(Return('a')); + EXPECT_CALL(foo, DoThat("Hello", NotNull())); + // The second argument must not be NULL. +``` + +A frequently used matcher is `_`, which matches anything: + +``` +using ::testing::_; +using ::testing::NotNull; +... + EXPECT_CALL(foo, DoThat(_, NotNull())); +``` + +## Combining Matchers ## + +You can build complex matchers from existing ones using `AllOf()`, +`AnyOf()`, and `Not()`: + +``` +using ::testing::AllOf; +using ::testing::Gt; +using ::testing::HasSubstr; +using ::testing::Ne; +using ::testing::Not; +... + // The argument must be > 5 and != 10. + EXPECT_CALL(foo, DoThis(AllOf(Gt(5), + Ne(10)))); + + // The first argument must not contain sub-string "blah". + EXPECT_CALL(foo, DoThat(Not(HasSubstr("blah")), + NULL)); +``` + +## Casting Matchers ## + +Google Mock matchers are statically typed, meaning that the compiler +can catch your mistake if you use a matcher of the wrong type (for +example, if you use `Eq(5)` to match a `string` argument). Good for +you! + +Sometimes, however, you know what you're doing and want the compiler +to give you some slack. One example is that you have a matcher for +`long` and the argument you want to match is `int`. While the two +types aren't exactly the same, there is nothing really wrong with +using a `Matcher` to match an `int` - after all, we can first +convert the `int` argument to a `long` before giving it to the +matcher. + +To support this need, Google Mock gives you the +`SafeMatcherCast(m)` function. It casts a matcher `m` to type +`Matcher`. To ensure safety, Google Mock checks that (let `U` be the +type `m` accepts): + + 1. Type `T` can be implicitly cast to type `U`; + 1. When both `T` and `U` are built-in arithmetic types (`bool`, integers, and floating-point numbers), the conversion from `T` to `U` is not lossy (in other words, any value representable by `T` can also be represented by `U`); and + 1. When `U` is a reference, `T` must also be a reference (as the underlying matcher may be interested in the address of the `U` value). + +The code won't compile if any of these conditions aren't met. + +Here's one example: + +``` +using ::testing::SafeMatcherCast; + +// A base class and a child class. +class Base { ... }; +class Derived : public Base { ... }; + +class MockFoo : public Foo { + public: + MOCK_METHOD1(DoThis, void(Derived* derived)); +}; +... + + MockFoo foo; + // m is a Matcher we got from somewhere. + EXPECT_CALL(foo, DoThis(SafeMatcherCast(m))); +``` + +If you find `SafeMatcherCast(m)` too limiting, you can use a similar +function `MatcherCast(m)`. The difference is that `MatcherCast` works +as long as you can `static_cast` type `T` to type `U`. + +`MatcherCast` essentially lets you bypass C++'s type system +(`static_cast` isn't always safe as it could throw away information, +for example), so be careful not to misuse/abuse it. + +## Selecting Between Overloaded Functions ## + +If you expect an overloaded function to be called, the compiler may +need some help on which overloaded version it is. + +To disambiguate functions overloaded on the const-ness of this object, +use the `Const()` argument wrapper. + +``` +using ::testing::ReturnRef; + +class MockFoo : public Foo { + ... + MOCK_METHOD0(GetBar, Bar&()); + MOCK_CONST_METHOD0(GetBar, const Bar&()); +}; +... + + MockFoo foo; + Bar bar1, bar2; + EXPECT_CALL(foo, GetBar()) // The non-const GetBar(). + .WillOnce(ReturnRef(bar1)); + EXPECT_CALL(Const(foo), GetBar()) // The const GetBar(). + .WillOnce(ReturnRef(bar2)); +``` + +(`Const()` is defined by Google Mock and returns a `const` reference +to its argument.) + +To disambiguate overloaded functions with the same number of arguments +but different argument types, you may need to specify the exact type +of a matcher, either by wrapping your matcher in `Matcher()`, or +using a matcher whose type is fixed (`TypedEq`, `An()`, +etc): + +``` +using ::testing::An; +using ::testing::Lt; +using ::testing::Matcher; +using ::testing::TypedEq; + +class MockPrinter : public Printer { + public: + MOCK_METHOD1(Print, void(int n)); + MOCK_METHOD1(Print, void(char c)); +}; + +TEST(PrinterTest, Print) { + MockPrinter printer; + + EXPECT_CALL(printer, Print(An())); // void Print(int); + EXPECT_CALL(printer, Print(Matcher(Lt(5)))); // void Print(int); + EXPECT_CALL(printer, Print(TypedEq('a'))); // void Print(char); + + printer.Print(3); + printer.Print(6); + printer.Print('a'); +} +``` + +## Performing Different Actions Based on the Arguments ## + +When a mock method is called, the _last_ matching expectation that's +still active will be selected (think "newer overrides older"). So, you +can make a method do different things depending on its argument values +like this: + +``` +using ::testing::_; +using ::testing::Lt; +using ::testing::Return; +... + // The default case. + EXPECT_CALL(foo, DoThis(_)) + .WillRepeatedly(Return('b')); + + // The more specific case. + EXPECT_CALL(foo, DoThis(Lt(5))) + .WillRepeatedly(Return('a')); +``` + +Now, if `foo.DoThis()` is called with a value less than 5, `'a'` will +be returned; otherwise `'b'` will be returned. + +## Matching Multiple Arguments as a Whole ## + +Sometimes it's not enough to match the arguments individually. For +example, we may want to say that the first argument must be less than +the second argument. The `With()` clause allows us to match +all arguments of a mock function as a whole. For example, + +``` +using ::testing::_; +using ::testing::Lt; +using ::testing::Ne; +... + EXPECT_CALL(foo, InRange(Ne(0), _)) + .With(Lt()); +``` + +says that the first argument of `InRange()` must not be 0, and must be +less than the second argument. + +The expression inside `With()` must be a matcher of type +`Matcher< ::testing::tuple >`, where `A1`, ..., `An` are the +types of the function arguments. + +You can also write `AllArgs(m)` instead of `m` inside `.With()`. The +two forms are equivalent, but `.With(AllArgs(Lt()))` is more readable +than `.With(Lt())`. + +You can use `Args(m)` to match the `n` selected arguments +(as a tuple) against `m`. For example, + +``` +using ::testing::_; +using ::testing::AllOf; +using ::testing::Args; +using ::testing::Lt; +... + EXPECT_CALL(foo, Blah(_, _, _)) + .With(AllOf(Args<0, 1>(Lt()), Args<1, 2>(Lt()))); +``` + +says that `Blah()` will be called with arguments `x`, `y`, and `z` where +`x < y < z`. + +As a convenience and example, Google Mock provides some matchers for +2-tuples, including the `Lt()` matcher above. See the [CheatSheet](CheatSheet.md) for +the complete list. + +Note that if you want to pass the arguments to a predicate of your own +(e.g. `.With(Args<0, 1>(Truly(&MyPredicate)))`), that predicate MUST be +written to take a `::testing::tuple` as its argument; Google Mock will pass the `n` selected arguments as _one_ single tuple to the predicate. + +## Using Matchers as Predicates ## + +Have you noticed that a matcher is just a fancy predicate that also +knows how to describe itself? Many existing algorithms take predicates +as arguments (e.g. those defined in STL's `` header), and +it would be a shame if Google Mock matchers are not allowed to +participate. + +Luckily, you can use a matcher where a unary predicate functor is +expected by wrapping it inside the `Matches()` function. For example, + +``` +#include +#include + +std::vector v; +... +// How many elements in v are >= 10? +const int count = count_if(v.begin(), v.end(), Matches(Ge(10))); +``` + +Since you can build complex matchers from simpler ones easily using +Google Mock, this gives you a way to conveniently construct composite +predicates (doing the same using STL's `` header is just +painful). For example, here's a predicate that's satisfied by any +number that is >= 0, <= 100, and != 50: + +``` +Matches(AllOf(Ge(0), Le(100), Ne(50))) +``` + +## Using Matchers in Google Test Assertions ## + +Since matchers are basically predicates that also know how to describe +themselves, there is a way to take advantage of them in +[Google Test](../../googletest/) assertions. It's +called `ASSERT_THAT` and `EXPECT_THAT`: + +``` + ASSERT_THAT(value, matcher); // Asserts that value matches matcher. + EXPECT_THAT(value, matcher); // The non-fatal version. +``` + +For example, in a Google Test test you can write: + +``` +#include "gmock/gmock.h" + +using ::testing::AllOf; +using ::testing::Ge; +using ::testing::Le; +using ::testing::MatchesRegex; +using ::testing::StartsWith; +... + + EXPECT_THAT(Foo(), StartsWith("Hello")); + EXPECT_THAT(Bar(), MatchesRegex("Line \\d+")); + ASSERT_THAT(Baz(), AllOf(Ge(5), Le(10))); +``` + +which (as you can probably guess) executes `Foo()`, `Bar()`, and +`Baz()`, and verifies that: + + * `Foo()` returns a string that starts with `"Hello"`. + * `Bar()` returns a string that matches regular expression `"Line \\d+"`. + * `Baz()` returns a number in the range [5, 10]. + +The nice thing about these macros is that _they read like +English_. They generate informative messages too. For example, if the +first `EXPECT_THAT()` above fails, the message will be something like: + +``` +Value of: Foo() + Actual: "Hi, world!" +Expected: starts with "Hello" +``` + +**Credit:** The idea of `(ASSERT|EXPECT)_THAT` was stolen from the +[Hamcrest](https://github.com/hamcrest/) project, which adds +`assertThat()` to JUnit. + +## Using Predicates as Matchers ## + +Google Mock provides a built-in set of matchers. In case you find them +lacking, you can use an arbitray unary predicate function or functor +as a matcher - as long as the predicate accepts a value of the type +you want. You do this by wrapping the predicate inside the `Truly()` +function, for example: + +``` +using ::testing::Truly; + +int IsEven(int n) { return (n % 2) == 0 ? 1 : 0; } +... + + // Bar() must be called with an even number. + EXPECT_CALL(foo, Bar(Truly(IsEven))); +``` + +Note that the predicate function / functor doesn't have to return +`bool`. It works as long as the return value can be used as the +condition in statement `if (condition) ...`. + +## Matching Arguments that Are Not Copyable ## + +When you do an `EXPECT_CALL(mock_obj, Foo(bar))`, Google Mock saves +away a copy of `bar`. When `Foo()` is called later, Google Mock +compares the argument to `Foo()` with the saved copy of `bar`. This +way, you don't need to worry about `bar` being modified or destroyed +after the `EXPECT_CALL()` is executed. The same is true when you use +matchers like `Eq(bar)`, `Le(bar)`, and so on. + +But what if `bar` cannot be copied (i.e. has no copy constructor)? You +could define your own matcher function and use it with `Truly()`, as +the previous couple of recipes have shown. Or, you may be able to get +away from it if you can guarantee that `bar` won't be changed after +the `EXPECT_CALL()` is executed. Just tell Google Mock that it should +save a reference to `bar`, instead of a copy of it. Here's how: + +``` +using ::testing::Eq; +using ::testing::ByRef; +using ::testing::Lt; +... + // Expects that Foo()'s argument == bar. + EXPECT_CALL(mock_obj, Foo(Eq(ByRef(bar)))); + + // Expects that Foo()'s argument < bar. + EXPECT_CALL(mock_obj, Foo(Lt(ByRef(bar)))); +``` + +Remember: if you do this, don't change `bar` after the +`EXPECT_CALL()`, or the result is undefined. + +## Validating a Member of an Object ## + +Often a mock function takes a reference to object as an argument. When +matching the argument, you may not want to compare the entire object +against a fixed object, as that may be over-specification. Instead, +you may need to validate a certain member variable or the result of a +certain getter method of the object. You can do this with `Field()` +and `Property()`. More specifically, + +``` +Field(&Foo::bar, m) +``` + +is a matcher that matches a `Foo` object whose `bar` member variable +satisfies matcher `m`. + +``` +Property(&Foo::baz, m) +``` + +is a matcher that matches a `Foo` object whose `baz()` method returns +a value that satisfies matcher `m`. + +For example: + +| Expression | Description | +|:-----------------------------|:-----------------------------------| +| `Field(&Foo::number, Ge(3))` | Matches `x` where `x.number >= 3`. | +| `Property(&Foo::name, StartsWith("John "))` | Matches `x` where `x.name()` starts with `"John "`. | + +Note that in `Property(&Foo::baz, ...)`, method `baz()` must take no +argument and be declared as `const`. + +BTW, `Field()` and `Property()` can also match plain pointers to +objects. For instance, + +``` +Field(&Foo::number, Ge(3)) +``` + +matches a plain pointer `p` where `p->number >= 3`. If `p` is `NULL`, +the match will always fail regardless of the inner matcher. + +What if you want to validate more than one members at the same time? +Remember that there is `AllOf()`. + +## Validating the Value Pointed to by a Pointer Argument ## + +C++ functions often take pointers as arguments. You can use matchers +like `IsNull()`, `NotNull()`, and other comparison matchers to match a +pointer, but what if you want to make sure the value _pointed to_ by +the pointer, instead of the pointer itself, has a certain property? +Well, you can use the `Pointee(m)` matcher. + +`Pointee(m)` matches a pointer iff `m` matches the value the pointer +points to. For example: + +``` +using ::testing::Ge; +using ::testing::Pointee; +... + EXPECT_CALL(foo, Bar(Pointee(Ge(3)))); +``` + +expects `foo.Bar()` to be called with a pointer that points to a value +greater than or equal to 3. + +One nice thing about `Pointee()` is that it treats a `NULL` pointer as +a match failure, so you can write `Pointee(m)` instead of + +``` + AllOf(NotNull(), Pointee(m)) +``` + +without worrying that a `NULL` pointer will crash your test. + +Also, did we tell you that `Pointee()` works with both raw pointers +**and** smart pointers (`linked_ptr`, `shared_ptr`, `scoped_ptr`, and +etc)? + +What if you have a pointer to pointer? You guessed it - you can use +nested `Pointee()` to probe deeper inside the value. For example, +`Pointee(Pointee(Lt(3)))` matches a pointer that points to a pointer +that points to a number less than 3 (what a mouthful...). + +## Testing a Certain Property of an Object ## + +Sometimes you want to specify that an object argument has a certain +property, but there is no existing matcher that does this. If you want +good error messages, you should define a matcher. If you want to do it +quick and dirty, you could get away with writing an ordinary function. + +Let's say you have a mock function that takes an object of type `Foo`, +which has an `int bar()` method and an `int baz()` method, and you +want to constrain that the argument's `bar()` value plus its `baz()` +value is a given number. Here's how you can define a matcher to do it: + +``` +using ::testing::MatcherInterface; +using ::testing::MatchResultListener; + +class BarPlusBazEqMatcher : public MatcherInterface { + public: + explicit BarPlusBazEqMatcher(int expected_sum) + : expected_sum_(expected_sum) {} + + virtual bool MatchAndExplain(const Foo& foo, + MatchResultListener* listener) const { + return (foo.bar() + foo.baz()) == expected_sum_; + } + + virtual void DescribeTo(::std::ostream* os) const { + *os << "bar() + baz() equals " << expected_sum_; + } + + virtual void DescribeNegationTo(::std::ostream* os) const { + *os << "bar() + baz() does not equal " << expected_sum_; + } + private: + const int expected_sum_; +}; + +inline Matcher BarPlusBazEq(int expected_sum) { + return MakeMatcher(new BarPlusBazEqMatcher(expected_sum)); +} + +... + + EXPECT_CALL(..., DoThis(BarPlusBazEq(5)))...; +``` + +## Matching Containers ## + +Sometimes an STL container (e.g. list, vector, map, ...) is passed to +a mock function and you may want to validate it. Since most STL +containers support the `==` operator, you can write +`Eq(expected_container)` or simply `expected_container` to match a +container exactly. + +Sometimes, though, you may want to be more flexible (for example, the +first element must be an exact match, but the second element can be +any positive number, and so on). Also, containers used in tests often +have a small number of elements, and having to define the expected +container out-of-line is a bit of a hassle. + +You can use the `ElementsAre()` or `UnorderedElementsAre()` matcher in +such cases: + +``` +using ::testing::_; +using ::testing::ElementsAre; +using ::testing::Gt; +... + + MOCK_METHOD1(Foo, void(const vector& numbers)); +... + + EXPECT_CALL(mock, Foo(ElementsAre(1, Gt(0), _, 5))); +``` + +The above matcher says that the container must have 4 elements, which +must be 1, greater than 0, anything, and 5 respectively. + +If you instead write: + +``` +using ::testing::_; +using ::testing::Gt; +using ::testing::UnorderedElementsAre; +... + + MOCK_METHOD1(Foo, void(const vector& numbers)); +... + + EXPECT_CALL(mock, Foo(UnorderedElementsAre(1, Gt(0), _, 5))); +``` + +It means that the container must have 4 elements, which under some +permutation must be 1, greater than 0, anything, and 5 respectively. + +`ElementsAre()` and `UnorderedElementsAre()` are overloaded to take 0 +to 10 arguments. If more are needed, you can place them in a C-style +array and use `ElementsAreArray()` or `UnorderedElementsAreArray()` +instead: + +``` +using ::testing::ElementsAreArray; +... + + // ElementsAreArray accepts an array of element values. + const int expected_vector1[] = { 1, 5, 2, 4, ... }; + EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector1))); + + // Or, an array of element matchers. + Matcher expected_vector2 = { 1, Gt(2), _, 3, ... }; + EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector2))); +``` + +In case the array needs to be dynamically created (and therefore the +array size cannot be inferred by the compiler), you can give +`ElementsAreArray()` an additional argument to specify the array size: + +``` +using ::testing::ElementsAreArray; +... + int* const expected_vector3 = new int[count]; + ... fill expected_vector3 with values ... + EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector3, count))); +``` + +**Tips:** + + * `ElementsAre*()` can be used to match _any_ container that implements the STL iterator pattern (i.e. it has a `const_iterator` type and supports `begin()/end()`), not just the ones defined in STL. It will even work with container types yet to be written - as long as they follows the above pattern. + * You can use nested `ElementsAre*()` to match nested (multi-dimensional) containers. + * If the container is passed by pointer instead of by reference, just write `Pointee(ElementsAre*(...))`. + * The order of elements _matters_ for `ElementsAre*()`. Therefore don't use it with containers whose element order is undefined (e.g. `hash_map`). + +## Sharing Matchers ## + +Under the hood, a Google Mock matcher object consists of a pointer to +a ref-counted implementation object. Copying matchers is allowed and +very efficient, as only the pointer is copied. When the last matcher +that references the implementation object dies, the implementation +object will be deleted. + +Therefore, if you have some complex matcher that you want to use again +and again, there is no need to build it everytime. Just assign it to a +matcher variable and use that variable repeatedly! For example, + +``` + Matcher in_range = AllOf(Gt(5), Le(10)); + ... use in_range as a matcher in multiple EXPECT_CALLs ... +``` + +# Setting Expectations # + +## Knowing When to Expect ## + +`ON_CALL` is likely the single most under-utilized construct in Google Mock. + +There are basically two constructs for defining the behavior of a mock object: `ON_CALL` and `EXPECT_CALL`. The difference? `ON_CALL` defines what happens when a mock method is called, but _doesn't imply any expectation on the method being called._ `EXPECT_CALL` not only defines the behavior, but also sets an expectation that _the method will be called with the given arguments, for the given number of times_ (and _in the given order_ when you specify the order too). + +Since `EXPECT_CALL` does more, isn't it better than `ON_CALL`? Not really. Every `EXPECT_CALL` adds a constraint on the behavior of the code under test. Having more constraints than necessary is _baaad_ - even worse than not having enough constraints. + +This may be counter-intuitive. How could tests that verify more be worse than tests that verify less? Isn't verification the whole point of tests? + +The answer, lies in _what_ a test should verify. **A good test verifies the contract of the code.** If a test over-specifies, it doesn't leave enough freedom to the implementation. As a result, changing the implementation without breaking the contract (e.g. refactoring and optimization), which should be perfectly fine to do, can break such tests. Then you have to spend time fixing them, only to see them broken again the next time the implementation is changed. + +Keep in mind that one doesn't have to verify more than one property in one test. In fact, **it's a good style to verify only one thing in one test.** If you do that, a bug will likely break only one or two tests instead of dozens (which case would you rather debug?). If you are also in the habit of giving tests descriptive names that tell what they verify, you can often easily guess what's wrong just from the test log itself. + +So use `ON_CALL` by default, and only use `EXPECT_CALL` when you actually intend to verify that the call is made. For example, you may have a bunch of `ON_CALL`s in your test fixture to set the common mock behavior shared by all tests in the same group, and write (scarcely) different `EXPECT_CALL`s in different `TEST_F`s to verify different aspects of the code's behavior. Compared with the style where each `TEST` has many `EXPECT_CALL`s, this leads to tests that are more resilient to implementational changes (and thus less likely to require maintenance) and makes the intent of the tests more obvious (so they are easier to maintain when you do need to maintain them). + +If you are bothered by the "Uninteresting mock function call" message printed when a mock method without an `EXPECT_CALL` is called, you may use a `NiceMock` instead to suppress all such messages for the mock object, or suppress the message for specific methods by adding `EXPECT_CALL(...).Times(AnyNumber())`. DO NOT suppress it by blindly adding an `EXPECT_CALL(...)`, or you'll have a test that's a pain to maintain. + +## Ignoring Uninteresting Calls ## + +If you are not interested in how a mock method is called, just don't +say anything about it. In this case, if the method is ever called, +Google Mock will perform its default action to allow the test program +to continue. If you are not happy with the default action taken by +Google Mock, you can override it using `DefaultValue::Set()` +(described later in this document) or `ON_CALL()`. + +Please note that once you expressed interest in a particular mock +method (via `EXPECT_CALL()`), all invocations to it must match some +expectation. If this function is called but the arguments don't match +any `EXPECT_CALL()` statement, it will be an error. + +## Disallowing Unexpected Calls ## + +If a mock method shouldn't be called at all, explicitly say so: + +``` +using ::testing::_; +... + EXPECT_CALL(foo, Bar(_)) + .Times(0); +``` + +If some calls to the method are allowed, but the rest are not, just +list all the expected calls: + +``` +using ::testing::AnyNumber; +using ::testing::Gt; +... + EXPECT_CALL(foo, Bar(5)); + EXPECT_CALL(foo, Bar(Gt(10))) + .Times(AnyNumber()); +``` + +A call to `foo.Bar()` that doesn't match any of the `EXPECT_CALL()` +statements will be an error. + +## Understanding Uninteresting vs Unexpected Calls ## + +_Uninteresting_ calls and _unexpected_ calls are different concepts in Google Mock. _Very_ different. + +A call `x.Y(...)` is **uninteresting** if there's _not even a single_ `EXPECT_CALL(x, Y(...))` set. In other words, the test isn't interested in the `x.Y()` method at all, as evident in that the test doesn't care to say anything about it. + +A call `x.Y(...)` is **unexpected** if there are some `EXPECT_CALL(x, Y(...))s` set, but none of them matches the call. Put another way, the test is interested in the `x.Y()` method (therefore it _explicitly_ sets some `EXPECT_CALL` to verify how it's called); however, the verification fails as the test doesn't expect this particular call to happen. + +**An unexpected call is always an error,** as the code under test doesn't behave the way the test expects it to behave. + +**By default, an uninteresting call is not an error,** as it violates no constraint specified by the test. (Google Mock's philosophy is that saying nothing means there is no constraint.) However, it leads to a warning, as it _might_ indicate a problem (e.g. the test author might have forgotten to specify a constraint). + +In Google Mock, `NiceMock` and `StrictMock` can be used to make a mock class "nice" or "strict". How does this affect uninteresting calls and unexpected calls? + +A **nice mock** suppresses uninteresting call warnings. It is less chatty than the default mock, but otherwise is the same. If a test fails with a default mock, it will also fail using a nice mock instead. And vice versa. Don't expect making a mock nice to change the test's result. + +A **strict mock** turns uninteresting call warnings into errors. So making a mock strict may change the test's result. + +Let's look at an example: + +``` +TEST(...) { + NiceMock mock_registry; + EXPECT_CALL(mock_registry, GetDomainOwner("google.com")) + .WillRepeatedly(Return("Larry Page")); + + // Use mock_registry in code under test. + ... &mock_registry ... +} +``` + +The sole `EXPECT_CALL` here says that all calls to `GetDomainOwner()` must have `"google.com"` as the argument. If `GetDomainOwner("yahoo.com")` is called, it will be an unexpected call, and thus an error. Having a nice mock doesn't change the severity of an unexpected call. + +So how do we tell Google Mock that `GetDomainOwner()` can be called with some other arguments as well? The standard technique is to add a "catch all" `EXPECT_CALL`: + +``` + EXPECT_CALL(mock_registry, GetDomainOwner(_)) + .Times(AnyNumber()); // catches all other calls to this method. + EXPECT_CALL(mock_registry, GetDomainOwner("google.com")) + .WillRepeatedly(Return("Larry Page")); +``` + +Remember that `_` is the wildcard matcher that matches anything. With this, if `GetDomainOwner("google.com")` is called, it will do what the second `EXPECT_CALL` says; if it is called with a different argument, it will do what the first `EXPECT_CALL` says. + +Note that the order of the two `EXPECT_CALLs` is important, as a newer `EXPECT_CALL` takes precedence over an older one. + +For more on uninteresting calls, nice mocks, and strict mocks, read ["The Nice, the Strict, and the Naggy"](#the-nice-the-strict-and-the-naggy). + +## Expecting Ordered Calls ## + +Although an `EXPECT_CALL()` statement defined earlier takes precedence +when Google Mock tries to match a function call with an expectation, +by default calls don't have to happen in the order `EXPECT_CALL()` +statements are written. For example, if the arguments match the +matchers in the third `EXPECT_CALL()`, but not those in the first two, +then the third expectation will be used. + +If you would rather have all calls occur in the order of the +expectations, put the `EXPECT_CALL()` statements in a block where you +define a variable of type `InSequence`: + +``` + using ::testing::_; + using ::testing::InSequence; + + { + InSequence s; + + EXPECT_CALL(foo, DoThis(5)); + EXPECT_CALL(bar, DoThat(_)) + .Times(2); + EXPECT_CALL(foo, DoThis(6)); + } +``` + +In this example, we expect a call to `foo.DoThis(5)`, followed by two +calls to `bar.DoThat()` where the argument can be anything, which are +in turn followed by a call to `foo.DoThis(6)`. If a call occurred +out-of-order, Google Mock will report an error. + +## Expecting Partially Ordered Calls ## + +Sometimes requiring everything to occur in a predetermined order can +lead to brittle tests. For example, we may care about `A` occurring +before both `B` and `C`, but aren't interested in the relative order +of `B` and `C`. In this case, the test should reflect our real intent, +instead of being overly constraining. + +Google Mock allows you to impose an arbitrary DAG (directed acyclic +graph) on the calls. One way to express the DAG is to use the +[After](CheatSheet.md#the-after-clause) clause of `EXPECT_CALL`. + +Another way is via the `InSequence()` clause (not the same as the +`InSequence` class), which we borrowed from jMock 2. It's less +flexible than `After()`, but more convenient when you have long chains +of sequential calls, as it doesn't require you to come up with +different names for the expectations in the chains. Here's how it +works: + +If we view `EXPECT_CALL()` statements as nodes in a graph, and add an +edge from node A to node B wherever A must occur before B, we can get +a DAG. We use the term "sequence" to mean a directed path in this +DAG. Now, if we decompose the DAG into sequences, we just need to know +which sequences each `EXPECT_CALL()` belongs to in order to be able to +reconstruct the orginal DAG. + +So, to specify the partial order on the expectations we need to do two +things: first to define some `Sequence` objects, and then for each +`EXPECT_CALL()` say which `Sequence` objects it is part +of. Expectations in the same sequence must occur in the order they are +written. For example, + +``` + using ::testing::Sequence; + + Sequence s1, s2; + + EXPECT_CALL(foo, A()) + .InSequence(s1, s2); + EXPECT_CALL(bar, B()) + .InSequence(s1); + EXPECT_CALL(bar, C()) + .InSequence(s2); + EXPECT_CALL(foo, D()) + .InSequence(s2); +``` + +specifies the following DAG (where `s1` is `A -> B`, and `s2` is `A -> +C -> D`): + +``` + +---> B + | + A ---| + | + +---> C ---> D +``` + +This means that A must occur before B and C, and C must occur before +D. There's no restriction about the order other than these. + +## Controlling When an Expectation Retires ## + +When a mock method is called, Google Mock only consider expectations +that are still active. An expectation is active when created, and +becomes inactive (aka _retires_) when a call that has to occur later +has occurred. For example, in + +``` + using ::testing::_; + using ::testing::Sequence; + + Sequence s1, s2; + + EXPECT_CALL(log, Log(WARNING, _, "File too large.")) // #1 + .Times(AnyNumber()) + .InSequence(s1, s2); + EXPECT_CALL(log, Log(WARNING, _, "Data set is empty.")) // #2 + .InSequence(s1); + EXPECT_CALL(log, Log(WARNING, _, "User not found.")) // #3 + .InSequence(s2); +``` + +as soon as either #2 or #3 is matched, #1 will retire. If a warning +`"File too large."` is logged after this, it will be an error. + +Note that an expectation doesn't retire automatically when it's +saturated. For example, + +``` +using ::testing::_; +... + EXPECT_CALL(log, Log(WARNING, _, _)); // #1 + EXPECT_CALL(log, Log(WARNING, _, "File too large.")); // #2 +``` + +says that there will be exactly one warning with the message `"File +too large."`. If the second warning contains this message too, #2 will +match again and result in an upper-bound-violated error. + +If this is not what you want, you can ask an expectation to retire as +soon as it becomes saturated: + +``` +using ::testing::_; +... + EXPECT_CALL(log, Log(WARNING, _, _)); // #1 + EXPECT_CALL(log, Log(WARNING, _, "File too large.")) // #2 + .RetiresOnSaturation(); +``` + +Here #2 can be used only once, so if you have two warnings with the +message `"File too large."`, the first will match #2 and the second +will match #1 - there will be no error. + +# Using Actions # + +## Returning References from Mock Methods ## + +If a mock function's return type is a reference, you need to use +`ReturnRef()` instead of `Return()` to return a result: + +``` +using ::testing::ReturnRef; + +class MockFoo : public Foo { + public: + MOCK_METHOD0(GetBar, Bar&()); +}; +... + + MockFoo foo; + Bar bar; + EXPECT_CALL(foo, GetBar()) + .WillOnce(ReturnRef(bar)); +``` + +## Returning Live Values from Mock Methods ## + +The `Return(x)` action saves a copy of `x` when the action is +_created_, and always returns the same value whenever it's +executed. Sometimes you may want to instead return the _live_ value of +`x` (i.e. its value at the time when the action is _executed_.). + +If the mock function's return type is a reference, you can do it using +`ReturnRef(x)`, as shown in the previous recipe ("Returning References +from Mock Methods"). However, Google Mock doesn't let you use +`ReturnRef()` in a mock function whose return type is not a reference, +as doing that usually indicates a user error. So, what shall you do? + +You may be tempted to try `ByRef()`: + +``` +using testing::ByRef; +using testing::Return; + +class MockFoo : public Foo { + public: + MOCK_METHOD0(GetValue, int()); +}; +... + int x = 0; + MockFoo foo; + EXPECT_CALL(foo, GetValue()) + .WillRepeatedly(Return(ByRef(x))); + x = 42; + EXPECT_EQ(42, foo.GetValue()); +``` + +Unfortunately, it doesn't work here. The above code will fail with error: + +``` +Value of: foo.GetValue() + Actual: 0 +Expected: 42 +``` + +The reason is that `Return(value)` converts `value` to the actual +return type of the mock function at the time when the action is +_created_, not when it is _executed_. (This behavior was chosen for +the action to be safe when `value` is a proxy object that references +some temporary objects.) As a result, `ByRef(x)` is converted to an +`int` value (instead of a `const int&`) when the expectation is set, +and `Return(ByRef(x))` will always return 0. + +`ReturnPointee(pointer)` was provided to solve this problem +specifically. It returns the value pointed to by `pointer` at the time +the action is _executed_: + +``` +using testing::ReturnPointee; +... + int x = 0; + MockFoo foo; + EXPECT_CALL(foo, GetValue()) + .WillRepeatedly(ReturnPointee(&x)); // Note the & here. + x = 42; + EXPECT_EQ(42, foo.GetValue()); // This will succeed now. +``` + +## Combining Actions ## + +Want to do more than one thing when a function is called? That's +fine. `DoAll()` allow you to do sequence of actions every time. Only +the return value of the last action in the sequence will be used. + +``` +using ::testing::DoAll; + +class MockFoo : public Foo { + public: + MOCK_METHOD1(Bar, bool(int n)); +}; +... + + EXPECT_CALL(foo, Bar(_)) + .WillOnce(DoAll(action_1, + action_2, + ... + action_n)); +``` + +## Mocking Side Effects ## + +Sometimes a method exhibits its effect not via returning a value but +via side effects. For example, it may change some global state or +modify an output argument. To mock side effects, in general you can +define your own action by implementing `::testing::ActionInterface`. + +If all you need to do is to change an output argument, the built-in +`SetArgPointee()` action is convenient: + +``` +using ::testing::SetArgPointee; + +class MockMutator : public Mutator { + public: + MOCK_METHOD2(Mutate, void(bool mutate, int* value)); + ... +}; +... + + MockMutator mutator; + EXPECT_CALL(mutator, Mutate(true, _)) + .WillOnce(SetArgPointee<1>(5)); +``` + +In this example, when `mutator.Mutate()` is called, we will assign 5 +to the `int` variable pointed to by argument #1 +(0-based). + +`SetArgPointee()` conveniently makes an internal copy of the +value you pass to it, removing the need to keep the value in scope and +alive. The implication however is that the value must have a copy +constructor and assignment operator. + +If the mock method also needs to return a value as well, you can chain +`SetArgPointee()` with `Return()` using `DoAll()`: + +``` +using ::testing::_; +using ::testing::Return; +using ::testing::SetArgPointee; + +class MockMutator : public Mutator { + public: + ... + MOCK_METHOD1(MutateInt, bool(int* value)); +}; +... + + MockMutator mutator; + EXPECT_CALL(mutator, MutateInt(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), + Return(true))); +``` + +If the output argument is an array, use the +`SetArrayArgument(first, last)` action instead. It copies the +elements in source range `[first, last)` to the array pointed to by +the `N`-th (0-based) argument: + +``` +using ::testing::NotNull; +using ::testing::SetArrayArgument; + +class MockArrayMutator : public ArrayMutator { + public: + MOCK_METHOD2(Mutate, void(int* values, int num_values)); + ... +}; +... + + MockArrayMutator mutator; + int values[5] = { 1, 2, 3, 4, 5 }; + EXPECT_CALL(mutator, Mutate(NotNull(), 5)) + .WillOnce(SetArrayArgument<0>(values, values + 5)); +``` + +This also works when the argument is an output iterator: + +``` +using ::testing::_; +using ::testing::SetArrayArgument; + +class MockRolodex : public Rolodex { + public: + MOCK_METHOD1(GetNames, void(std::back_insert_iterator >)); + ... +}; +... + + MockRolodex rolodex; + vector names; + names.push_back("George"); + names.push_back("John"); + names.push_back("Thomas"); + EXPECT_CALL(rolodex, GetNames(_)) + .WillOnce(SetArrayArgument<0>(names.begin(), names.end())); +``` + +## Changing a Mock Object's Behavior Based on the State ## + +If you expect a call to change the behavior of a mock object, you can use `::testing::InSequence` to specify different behaviors before and after the call: + +``` +using ::testing::InSequence; +using ::testing::Return; + +... + { + InSequence seq; + EXPECT_CALL(my_mock, IsDirty()) + .WillRepeatedly(Return(true)); + EXPECT_CALL(my_mock, Flush()); + EXPECT_CALL(my_mock, IsDirty()) + .WillRepeatedly(Return(false)); + } + my_mock.FlushIfDirty(); +``` + +This makes `my_mock.IsDirty()` return `true` before `my_mock.Flush()` is called and return `false` afterwards. + +If the behavior change is more complex, you can store the effects in a variable and make a mock method get its return value from that variable: + +``` +using ::testing::_; +using ::testing::SaveArg; +using ::testing::Return; + +ACTION_P(ReturnPointee, p) { return *p; } +... + int previous_value = 0; + EXPECT_CALL(my_mock, GetPrevValue()) + .WillRepeatedly(ReturnPointee(&previous_value)); + EXPECT_CALL(my_mock, UpdateValue(_)) + .WillRepeatedly(SaveArg<0>(&previous_value)); + my_mock.DoSomethingToUpdateValue(); +``` + +Here `my_mock.GetPrevValue()` will always return the argument of the last `UpdateValue()` call. + +## Setting the Default Value for a Return Type ## + +If a mock method's return type is a built-in C++ type or pointer, by +default it will return 0 when invoked. Also, in C++ 11 and above, a mock +method whose return type has a default constructor will return a default-constructed +value by default. You only need to specify an +action if this default value doesn't work for you. + +Sometimes, you may want to change this default value, or you may want +to specify a default value for types Google Mock doesn't know +about. You can do this using the `::testing::DefaultValue` class +template: + +``` +class MockFoo : public Foo { + public: + MOCK_METHOD0(CalculateBar, Bar()); +}; +... + + Bar default_bar; + // Sets the default return value for type Bar. + DefaultValue::Set(default_bar); + + MockFoo foo; + + // We don't need to specify an action here, as the default + // return value works for us. + EXPECT_CALL(foo, CalculateBar()); + + foo.CalculateBar(); // This should return default_bar. + + // Unsets the default return value. + DefaultValue::Clear(); +``` + +Please note that changing the default value for a type can make you +tests hard to understand. We recommend you to use this feature +judiciously. For example, you may want to make sure the `Set()` and +`Clear()` calls are right next to the code that uses your mock. + +## Setting the Default Actions for a Mock Method ## + +You've learned how to change the default value of a given +type. However, this may be too coarse for your purpose: perhaps you +have two mock methods with the same return type and you want them to +have different behaviors. The `ON_CALL()` macro allows you to +customize your mock's behavior at the method level: + +``` +using ::testing::_; +using ::testing::AnyNumber; +using ::testing::Gt; +using ::testing::Return; +... + ON_CALL(foo, Sign(_)) + .WillByDefault(Return(-1)); + ON_CALL(foo, Sign(0)) + .WillByDefault(Return(0)); + ON_CALL(foo, Sign(Gt(0))) + .WillByDefault(Return(1)); + + EXPECT_CALL(foo, Sign(_)) + .Times(AnyNumber()); + + foo.Sign(5); // This should return 1. + foo.Sign(-9); // This should return -1. + foo.Sign(0); // This should return 0. +``` + +As you may have guessed, when there are more than one `ON_CALL()` +statements, the news order take precedence over the older ones. In +other words, the **last** one that matches the function arguments will +be used. This matching order allows you to set up the common behavior +in a mock object's constructor or the test fixture's set-up phase and +specialize the mock's behavior later. + +## Using Functions/Methods/Functors as Actions ## + +If the built-in actions don't suit you, you can easily use an existing +function, method, or functor as an action: + +``` +using ::testing::_; +using ::testing::Invoke; + +class MockFoo : public Foo { + public: + MOCK_METHOD2(Sum, int(int x, int y)); + MOCK_METHOD1(ComplexJob, bool(int x)); +}; + +int CalculateSum(int x, int y) { return x + y; } + +class Helper { + public: + bool ComplexJob(int x); +}; +... + + MockFoo foo; + Helper helper; + EXPECT_CALL(foo, Sum(_, _)) + .WillOnce(Invoke(CalculateSum)); + EXPECT_CALL(foo, ComplexJob(_)) + .WillOnce(Invoke(&helper, &Helper::ComplexJob)); + + foo.Sum(5, 6); // Invokes CalculateSum(5, 6). + foo.ComplexJob(10); // Invokes helper.ComplexJob(10); +``` + +The only requirement is that the type of the function, etc must be +_compatible_ with the signature of the mock function, meaning that the +latter's arguments can be implicitly converted to the corresponding +arguments of the former, and the former's return type can be +implicitly converted to that of the latter. So, you can invoke +something whose type is _not_ exactly the same as the mock function, +as long as it's safe to do so - nice, huh? + +## Invoking a Function/Method/Functor Without Arguments ## + +`Invoke()` is very useful for doing actions that are more complex. It +passes the mock function's arguments to the function or functor being +invoked such that the callee has the full context of the call to work +with. If the invoked function is not interested in some or all of the +arguments, it can simply ignore them. + +Yet, a common pattern is that a test author wants to invoke a function +without the arguments of the mock function. `Invoke()` allows her to +do that using a wrapper function that throws away the arguments before +invoking an underlining nullary function. Needless to say, this can be +tedious and obscures the intent of the test. + +`InvokeWithoutArgs()` solves this problem. It's like `Invoke()` except +that it doesn't pass the mock function's arguments to the +callee. Here's an example: + +``` +using ::testing::_; +using ::testing::InvokeWithoutArgs; + +class MockFoo : public Foo { + public: + MOCK_METHOD1(ComplexJob, bool(int n)); +}; + +bool Job1() { ... } +... + + MockFoo foo; + EXPECT_CALL(foo, ComplexJob(_)) + .WillOnce(InvokeWithoutArgs(Job1)); + + foo.ComplexJob(10); // Invokes Job1(). +``` + +## Invoking an Argument of the Mock Function ## + +Sometimes a mock function will receive a function pointer or a functor +(in other words, a "callable") as an argument, e.g. + +``` +class MockFoo : public Foo { + public: + MOCK_METHOD2(DoThis, bool(int n, bool (*fp)(int))); +}; +``` + +and you may want to invoke this callable argument: + +``` +using ::testing::_; +... + MockFoo foo; + EXPECT_CALL(foo, DoThis(_, _)) + .WillOnce(...); + // Will execute (*fp)(5), where fp is the + // second argument DoThis() receives. +``` + +Arghh, you need to refer to a mock function argument but your version +of C++ has no lambdas, so you have to define your own action. :-( +Or do you really? + +Well, Google Mock has an action to solve _exactly_ this problem: + +``` + InvokeArgument(arg_1, arg_2, ..., arg_m) +``` + +will invoke the `N`-th (0-based) argument the mock function receives, +with `arg_1`, `arg_2`, ..., and `arg_m`. No matter if the argument is +a function pointer or a functor, Google Mock handles them both. + +With that, you could write: + +``` +using ::testing::_; +using ::testing::InvokeArgument; +... + EXPECT_CALL(foo, DoThis(_, _)) + .WillOnce(InvokeArgument<1>(5)); + // Will execute (*fp)(5), where fp is the + // second argument DoThis() receives. +``` + +What if the callable takes an argument by reference? No problem - just +wrap it inside `ByRef()`: + +``` +... + MOCK_METHOD1(Bar, bool(bool (*fp)(int, const Helper&))); +... +using ::testing::_; +using ::testing::ByRef; +using ::testing::InvokeArgument; +... + + MockFoo foo; + Helper helper; + ... + EXPECT_CALL(foo, Bar(_)) + .WillOnce(InvokeArgument<0>(5, ByRef(helper))); + // ByRef(helper) guarantees that a reference to helper, not a copy of it, + // will be passed to the callable. +``` + +What if the callable takes an argument by reference and we do **not** +wrap the argument in `ByRef()`? Then `InvokeArgument()` will _make a +copy_ of the argument, and pass a _reference to the copy_, instead of +a reference to the original value, to the callable. This is especially +handy when the argument is a temporary value: + +``` +... + MOCK_METHOD1(DoThat, bool(bool (*f)(const double& x, const string& s))); +... +using ::testing::_; +using ::testing::InvokeArgument; +... + + MockFoo foo; + ... + EXPECT_CALL(foo, DoThat(_)) + .WillOnce(InvokeArgument<0>(5.0, string("Hi"))); + // Will execute (*f)(5.0, string("Hi")), where f is the function pointer + // DoThat() receives. Note that the values 5.0 and string("Hi") are + // temporary and dead once the EXPECT_CALL() statement finishes. Yet + // it's fine to perform this action later, since a copy of the values + // are kept inside the InvokeArgument action. +``` + +## Ignoring an Action's Result ## + +Sometimes you have an action that returns _something_, but you need an +action that returns `void` (perhaps you want to use it in a mock +function that returns `void`, or perhaps it needs to be used in +`DoAll()` and it's not the last in the list). `IgnoreResult()` lets +you do that. For example: + +``` +using ::testing::_; +using ::testing::Invoke; +using ::testing::Return; + +int Process(const MyData& data); +string DoSomething(); + +class MockFoo : public Foo { + public: + MOCK_METHOD1(Abc, void(const MyData& data)); + MOCK_METHOD0(Xyz, bool()); +}; +... + + MockFoo foo; + EXPECT_CALL(foo, Abc(_)) + // .WillOnce(Invoke(Process)); + // The above line won't compile as Process() returns int but Abc() needs + // to return void. + .WillOnce(IgnoreResult(Invoke(Process))); + + EXPECT_CALL(foo, Xyz()) + .WillOnce(DoAll(IgnoreResult(Invoke(DoSomething)), + // Ignores the string DoSomething() returns. + Return(true))); +``` + +Note that you **cannot** use `IgnoreResult()` on an action that already +returns `void`. Doing so will lead to ugly compiler errors. + +## Selecting an Action's Arguments ## + +Say you have a mock function `Foo()` that takes seven arguments, and +you have a custom action that you want to invoke when `Foo()` is +called. Trouble is, the custom action only wants three arguments: + +``` +using ::testing::_; +using ::testing::Invoke; +... + MOCK_METHOD7(Foo, bool(bool visible, const string& name, int x, int y, + const map, double>& weight, + double min_weight, double max_wight)); +... + +bool IsVisibleInQuadrant1(bool visible, int x, int y) { + return visible && x >= 0 && y >= 0; +} +... + + EXPECT_CALL(mock, Foo(_, _, _, _, _, _, _)) + .WillOnce(Invoke(IsVisibleInQuadrant1)); // Uh, won't compile. :-( +``` + +To please the compiler God, you can to define an "adaptor" that has +the same signature as `Foo()` and calls the custom action with the +right arguments: + +``` +using ::testing::_; +using ::testing::Invoke; + +bool MyIsVisibleInQuadrant1(bool visible, const string& name, int x, int y, + const map, double>& weight, + double min_weight, double max_wight) { + return IsVisibleInQuadrant1(visible, x, y); +} +... + + EXPECT_CALL(mock, Foo(_, _, _, _, _, _, _)) + .WillOnce(Invoke(MyIsVisibleInQuadrant1)); // Now it works. +``` + +But isn't this awkward? + +Google Mock provides a generic _action adaptor_, so you can spend your +time minding more important business than writing your own +adaptors. Here's the syntax: + +``` + WithArgs(action) +``` + +creates an action that passes the arguments of the mock function at +the given indices (0-based) to the inner `action` and performs +it. Using `WithArgs`, our original example can be written as: + +``` +using ::testing::_; +using ::testing::Invoke; +using ::testing::WithArgs; +... + EXPECT_CALL(mock, Foo(_, _, _, _, _, _, _)) + .WillOnce(WithArgs<0, 2, 3>(Invoke(IsVisibleInQuadrant1))); + // No need to define your own adaptor. +``` + +For better readability, Google Mock also gives you: + + * `WithoutArgs(action)` when the inner `action` takes _no_ argument, and + * `WithArg(action)` (no `s` after `Arg`) when the inner `action` takes _one_ argument. + +As you may have realized, `InvokeWithoutArgs(...)` is just syntactic +sugar for `WithoutArgs(Invoke(...))`. + +Here are more tips: + + * The inner action used in `WithArgs` and friends does not have to be `Invoke()` -- it can be anything. + * You can repeat an argument in the argument list if necessary, e.g. `WithArgs<2, 3, 3, 5>(...)`. + * You can change the order of the arguments, e.g. `WithArgs<3, 2, 1>(...)`. + * The types of the selected arguments do _not_ have to match the signature of the inner action exactly. It works as long as they can be implicitly converted to the corresponding arguments of the inner action. For example, if the 4-th argument of the mock function is an `int` and `my_action` takes a `double`, `WithArg<4>(my_action)` will work. + +## Ignoring Arguments in Action Functions ## + +The selecting-an-action's-arguments recipe showed us one way to make a +mock function and an action with incompatible argument lists fit +together. The downside is that wrapping the action in +`WithArgs<...>()` can get tedious for people writing the tests. + +If you are defining a function, method, or functor to be used with +`Invoke*()`, and you are not interested in some of its arguments, an +alternative to `WithArgs` is to declare the uninteresting arguments as +`Unused`. This makes the definition less cluttered and less fragile in +case the types of the uninteresting arguments change. It could also +increase the chance the action function can be reused. For example, +given + +``` + MOCK_METHOD3(Foo, double(const string& label, double x, double y)); + MOCK_METHOD3(Bar, double(int index, double x, double y)); +``` + +instead of + +``` +using ::testing::_; +using ::testing::Invoke; + +double DistanceToOriginWithLabel(const string& label, double x, double y) { + return sqrt(x*x + y*y); +} + +double DistanceToOriginWithIndex(int index, double x, double y) { + return sqrt(x*x + y*y); +} +... + + EXEPCT_CALL(mock, Foo("abc", _, _)) + .WillOnce(Invoke(DistanceToOriginWithLabel)); + EXEPCT_CALL(mock, Bar(5, _, _)) + .WillOnce(Invoke(DistanceToOriginWithIndex)); +``` + +you could write + +``` +using ::testing::_; +using ::testing::Invoke; +using ::testing::Unused; + +double DistanceToOrigin(Unused, double x, double y) { + return sqrt(x*x + y*y); +} +... + + EXEPCT_CALL(mock, Foo("abc", _, _)) + .WillOnce(Invoke(DistanceToOrigin)); + EXEPCT_CALL(mock, Bar(5, _, _)) + .WillOnce(Invoke(DistanceToOrigin)); +``` + +## Sharing Actions ## + +Just like matchers, a Google Mock action object consists of a pointer +to a ref-counted implementation object. Therefore copying actions is +also allowed and very efficient. When the last action that references +the implementation object dies, the implementation object will be +deleted. + +If you have some complex action that you want to use again and again, +you may not have to build it from scratch everytime. If the action +doesn't have an internal state (i.e. if it always does the same thing +no matter how many times it has been called), you can assign it to an +action variable and use that variable repeatedly. For example: + +``` + Action set_flag = DoAll(SetArgPointee<0>(5), + Return(true)); + ... use set_flag in .WillOnce() and .WillRepeatedly() ... +``` + +However, if the action has its own state, you may be surprised if you +share the action object. Suppose you have an action factory +`IncrementCounter(init)` which creates an action that increments and +returns a counter whose initial value is `init`, using two actions +created from the same expression and using a shared action will +exihibit different behaviors. Example: + +``` + EXPECT_CALL(foo, DoThis()) + .WillRepeatedly(IncrementCounter(0)); + EXPECT_CALL(foo, DoThat()) + .WillRepeatedly(IncrementCounter(0)); + foo.DoThis(); // Returns 1. + foo.DoThis(); // Returns 2. + foo.DoThat(); // Returns 1 - Blah() uses a different + // counter than Bar()'s. +``` + +versus + +``` + Action increment = IncrementCounter(0); + + EXPECT_CALL(foo, DoThis()) + .WillRepeatedly(increment); + EXPECT_CALL(foo, DoThat()) + .WillRepeatedly(increment); + foo.DoThis(); // Returns 1. + foo.DoThis(); // Returns 2. + foo.DoThat(); // Returns 3 - the counter is shared. +``` + +# Misc Recipes on Using Google Mock # + +## Mocking Methods That Use Move-Only Types ## + +C++11 introduced move-only types. A move-only-typed value can be moved from one object to another, but cannot be copied. `std::unique_ptr` is probably the most commonly used move-only type. + +Mocking a method that takes and/or returns move-only types presents some challenges, but nothing insurmountable. This recipe shows you how you can do it. + +Let’s say we are working on a fictional project that lets one post and share snippets called “buzzes”. Your code uses these types: + +``` +enum class AccessLevel { kInternal, kPublic }; + +class Buzz { + public: + explicit Buzz(AccessLevel access) { … } + ... +}; + +class Buzzer { + public: + virtual ~Buzzer() {} + virtual std::unique_ptr MakeBuzz(const std::string& text) = 0; + virtual bool ShareBuzz(std::unique_ptr buzz, Time timestamp) = 0; + ... +}; +``` + +A `Buzz` object represents a snippet being posted. A class that implements the `Buzzer` interface is capable of creating and sharing `Buzz`. Methods in `Buzzer` may return a `unique_ptr` or take a `unique_ptr`. Now we need to mock `Buzzer` in our tests. + +To mock a method that returns a move-only type, you just use the familiar `MOCK_METHOD` syntax as usual: + +``` +class MockBuzzer : public Buzzer { + public: + MOCK_METHOD1(MakeBuzz, std::unique_ptr(const std::string& text)); + … +}; +``` + +However, if you attempt to use the same `MOCK_METHOD` pattern to mock a method that takes a move-only parameter, you’ll get a compiler error currently: + +``` + // Does NOT compile! + MOCK_METHOD2(ShareBuzz, bool(std::unique_ptr buzz, Time timestamp)); +``` + +While it’s highly desirable to make this syntax just work, it’s not trivial and the work hasn’t been done yet. Fortunately, there is a trick you can apply today to get something that works nearly as well as this. + +The trick, is to delegate the `ShareBuzz()` method to a mock method (let’s call it `DoShareBuzz()`) that does not take move-only parameters: + +``` +class MockBuzzer : public Buzzer { + public: + MOCK_METHOD1(MakeBuzz, std::unique_ptr(const std::string& text)); + MOCK_METHOD2(DoShareBuzz, bool(Buzz* buzz, Time timestamp)); + bool ShareBuzz(std::unique_ptr buzz, Time timestamp) { + return DoShareBuzz(buzz.get(), timestamp); + } +}; +``` + +Note that there's no need to define or declare `DoShareBuzz()` in a base class. You only need to define it as a `MOCK_METHOD` in the mock class. + +Now that we have the mock class defined, we can use it in tests. In the following code examples, we assume that we have defined a `MockBuzzer` object named `mock_buzzer_`: + +``` + MockBuzzer mock_buzzer_; +``` + +First let’s see how we can set expectations on the `MakeBuzz()` method, which returns a `unique_ptr`. + +As usual, if you set an expectation without an action (i.e. the `.WillOnce()` or `.WillRepeated()` clause), when that expectation fires, the default action for that method will be taken. Since `unique_ptr<>` has a default constructor that returns a null `unique_ptr`, that’s what you’ll get if you don’t specify an action: + +``` + // Use the default action. + EXPECT_CALL(mock_buzzer_, MakeBuzz("hello")); + + // Triggers the previous EXPECT_CALL. + EXPECT_EQ(nullptr, mock_buzzer_.MakeBuzz("hello")); +``` + +If you are not happy with the default action, you can tweak it. Depending on what you need, you may either tweak the default action for a specific (mock object, mock method) combination using `ON_CALL()`, or you may tweak the default action for all mock methods that return a specific type. The usage of `ON_CALL()` is similar to `EXPECT_CALL()`, so we’ll skip it and just explain how to do the latter (tweaking the default action for a specific return type). You do this via the `DefaultValue<>::SetFactory()` and `DefaultValue<>::Clear()` API: + +``` + // Sets the default action for return type std::unique_ptr to + // creating a new Buzz every time. + DefaultValue>::SetFactory( + [] { return MakeUnique(AccessLevel::kInternal); }); + + // When this fires, the default action of MakeBuzz() will run, which + // will return a new Buzz object. + EXPECT_CALL(mock_buzzer_, MakeBuzz("hello")).Times(AnyNumber()); + + auto buzz1 = mock_buzzer_.MakeBuzz("hello"); + auto buzz2 = mock_buzzer_.MakeBuzz("hello"); + EXPECT_NE(nullptr, buzz1); + EXPECT_NE(nullptr, buzz2); + EXPECT_NE(buzz1, buzz2); + + // Resets the default action for return type std::unique_ptr, + // to avoid interfere with other tests. + DefaultValue>::Clear(); +``` + +What if you want the method to do something other than the default action? If you just need to return a pre-defined move-only value, you can use the `Return(ByMove(...))` action: + +``` + // When this fires, the unique_ptr<> specified by ByMove(...) will + // be returned. + EXPECT_CALL(mock_buzzer_, MakeBuzz("world")) + .WillOnce(Return(ByMove(MakeUnique(AccessLevel::kInternal)))); + + EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("world")); +``` + +Note that `ByMove()` is essential here - if you drop it, the code won’t compile. + +Quiz time! What do you think will happen if a `Return(ByMove(...))` action is performed more than once (e.g. you write `….WillRepeatedly(Return(ByMove(...)));`)? Come think of it, after the first time the action runs, the source value will be consumed (since it’s a move-only value), so the next time around, there’s no value to move from -- you’ll get a run-time error that `Return(ByMove(...))` can only be run once. + +If you need your mock method to do more than just moving a pre-defined value, remember that you can always use `Invoke()` to call a lambda or a callable object, which can do pretty much anything you want: + +``` + EXPECT_CALL(mock_buzzer_, MakeBuzz("x")) + .WillRepeatedly(Invoke([](const std::string& text) { + return std::make_unique(AccessLevel::kInternal); + })); + + EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("x")); + EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("x")); +``` + +Every time this `EXPECT_CALL` fires, a new `unique_ptr` will be created and returned. You cannot do this with `Return(ByMove(...))`. + +Now there’s one topic we haven’t covered: how do you set expectations on `ShareBuzz()`, which takes a move-only-typed parameter? The answer is you don’t. Instead, you set expectations on the `DoShareBuzz()` mock method (remember that we defined a `MOCK_METHOD` for `DoShareBuzz()`, not `ShareBuzz()`): + +``` + EXPECT_CALL(mock_buzzer_, DoShareBuzz(NotNull(), _)); + + // When one calls ShareBuzz() on the MockBuzzer like this, the call is + // forwarded to DoShareBuzz(), which is mocked. Therefore this statement + // will trigger the above EXPECT_CALL. + mock_buzzer_.ShareBuzz(MakeUnique(AccessLevel::kInternal), + ::base::Now()); +``` + +Some of you may have spotted one problem with this approach: the `DoShareBuzz()` mock method differs from the real `ShareBuzz()` method in that it cannot take ownership of the buzz parameter - `ShareBuzz()` will always delete buzz after `DoShareBuzz()` returns. What if you need to save the buzz object somewhere for later use when `ShareBuzz()` is called? Indeed, you'd be stuck. + +Another problem with the `DoShareBuzz()` we had is that it can surprise people reading or maintaining the test, as one would expect that `DoShareBuzz()` has (logically) the same contract as `ShareBuzz()`. + +Fortunately, these problems can be fixed with a bit more code. Let's try to get it right this time: + +``` +class MockBuzzer : public Buzzer { + public: + MockBuzzer() { + // Since DoShareBuzz(buzz, time) is supposed to take ownership of + // buzz, define a default behavior for DoShareBuzz(buzz, time) to + // delete buzz. + ON_CALL(*this, DoShareBuzz(_, _)) + .WillByDefault(Invoke([](Buzz* buzz, Time timestamp) { + delete buzz; + return true; + })); + } + + MOCK_METHOD1(MakeBuzz, std::unique_ptr(const std::string& text)); + + // Takes ownership of buzz. + MOCK_METHOD2(DoShareBuzz, bool(Buzz* buzz, Time timestamp)); + bool ShareBuzz(std::unique_ptr buzz, Time timestamp) { + return DoShareBuzz(buzz.release(), timestamp); + } +}; +``` + +Now, the mock `DoShareBuzz()` method is free to save the buzz argument for later use if this is what you want: + +``` + std::unique_ptr intercepted_buzz; + EXPECT_CALL(mock_buzzer_, DoShareBuzz(NotNull(), _)) + .WillOnce(Invoke([&intercepted_buzz](Buzz* buzz, Time timestamp) { + // Save buzz in intercepted_buzz for analysis later. + intercepted_buzz.reset(buzz); + return false; + })); + + mock_buzzer_.ShareBuzz(std::make_unique(AccessLevel::kInternal), + Now()); + EXPECT_NE(nullptr, intercepted_buzz); +``` + +Using the tricks covered in this recipe, you are now able to mock methods that take and/or return move-only types. Put your newly-acquired power to good use - when you design a new API, you can now feel comfortable using `unique_ptrs` as appropriate, without fearing that doing so will compromise your tests. + +## Making the Compilation Faster ## + +Believe it or not, the _vast majority_ of the time spent on compiling +a mock class is in generating its constructor and destructor, as they +perform non-trivial tasks (e.g. verification of the +expectations). What's more, mock methods with different signatures +have different types and thus their constructors/destructors need to +be generated by the compiler separately. As a result, if you mock many +different types of methods, compiling your mock class can get really +slow. + +If you are experiencing slow compilation, you can move the definition +of your mock class' constructor and destructor out of the class body +and into a `.cpp` file. This way, even if you `#include` your mock +class in N files, the compiler only needs to generate its constructor +and destructor once, resulting in a much faster compilation. + +Let's illustrate the idea using an example. Here's the definition of a +mock class before applying this recipe: + +``` +// File mock_foo.h. +... +class MockFoo : public Foo { + public: + // Since we don't declare the constructor or the destructor, + // the compiler will generate them in every translation unit + // where this mock class is used. + + MOCK_METHOD0(DoThis, int()); + MOCK_METHOD1(DoThat, bool(const char* str)); + ... more mock methods ... +}; +``` + +After the change, it would look like: + +``` +// File mock_foo.h. +... +class MockFoo : public Foo { + public: + // The constructor and destructor are declared, but not defined, here. + MockFoo(); + virtual ~MockFoo(); + + MOCK_METHOD0(DoThis, int()); + MOCK_METHOD1(DoThat, bool(const char* str)); + ... more mock methods ... +}; +``` +and +``` +// File mock_foo.cpp. +#include "path/to/mock_foo.h" + +// The definitions may appear trivial, but the functions actually do a +// lot of things through the constructors/destructors of the member +// variables used to implement the mock methods. +MockFoo::MockFoo() {} +MockFoo::~MockFoo() {} +``` + +## Forcing a Verification ## + +When it's being destroyed, your friendly mock object will automatically +verify that all expectations on it have been satisfied, and will +generate [Google Test](../../googletest/) failures +if not. This is convenient as it leaves you with one less thing to +worry about. That is, unless you are not sure if your mock object will +be destroyed. + +How could it be that your mock object won't eventually be destroyed? +Well, it might be created on the heap and owned by the code you are +testing. Suppose there's a bug in that code and it doesn't delete the +mock object properly - you could end up with a passing test when +there's actually a bug. + +Using a heap checker is a good idea and can alleviate the concern, but +its implementation may not be 100% reliable. So, sometimes you do want +to _force_ Google Mock to verify a mock object before it is +(hopefully) destructed. You can do this with +`Mock::VerifyAndClearExpectations(&mock_object)`: + +``` +TEST(MyServerTest, ProcessesRequest) { + using ::testing::Mock; + + MockFoo* const foo = new MockFoo; + EXPECT_CALL(*foo, ...)...; + // ... other expectations ... + + // server now owns foo. + MyServer server(foo); + server.ProcessRequest(...); + + // In case that server's destructor will forget to delete foo, + // this will verify the expectations anyway. + Mock::VerifyAndClearExpectations(foo); +} // server is destroyed when it goes out of scope here. +``` + +**Tip:** The `Mock::VerifyAndClearExpectations()` function returns a +`bool` to indicate whether the verification was successful (`true` for +yes), so you can wrap that function call inside a `ASSERT_TRUE()` if +there is no point going further when the verification has failed. + +## Using Check Points ## + +Sometimes you may want to "reset" a mock object at various check +points in your test: at each check point, you verify that all existing +expectations on the mock object have been satisfied, and then you set +some new expectations on it as if it's newly created. This allows you +to work with a mock object in "phases" whose sizes are each +manageable. + +One such scenario is that in your test's `SetUp()` function, you may +want to put the object you are testing into a certain state, with the +help from a mock object. Once in the desired state, you want to clear +all expectations on the mock, such that in the `TEST_F` body you can +set fresh expectations on it. + +As you may have figured out, the `Mock::VerifyAndClearExpectations()` +function we saw in the previous recipe can help you here. Or, if you +are using `ON_CALL()` to set default actions on the mock object and +want to clear the default actions as well, use +`Mock::VerifyAndClear(&mock_object)` instead. This function does what +`Mock::VerifyAndClearExpectations(&mock_object)` does and returns the +same `bool`, **plus** it clears the `ON_CALL()` statements on +`mock_object` too. + +Another trick you can use to achieve the same effect is to put the +expectations in sequences and insert calls to a dummy "check-point" +function at specific places. Then you can verify that the mock +function calls do happen at the right time. For example, if you are +exercising code: + +``` +Foo(1); +Foo(2); +Foo(3); +``` + +and want to verify that `Foo(1)` and `Foo(3)` both invoke +`mock.Bar("a")`, but `Foo(2)` doesn't invoke anything. You can write: + +``` +using ::testing::MockFunction; + +TEST(FooTest, InvokesBarCorrectly) { + MyMock mock; + // Class MockFunction has exactly one mock method. It is named + // Call() and has type F. + MockFunction check; + { + InSequence s; + + EXPECT_CALL(mock, Bar("a")); + EXPECT_CALL(check, Call("1")); + EXPECT_CALL(check, Call("2")); + EXPECT_CALL(mock, Bar("a")); + } + Foo(1); + check.Call("1"); + Foo(2); + check.Call("2"); + Foo(3); +} +``` + +The expectation spec says that the first `Bar("a")` must happen before +check point "1", the second `Bar("a")` must happen after check point "2", +and nothing should happen between the two check points. The explicit +check points make it easy to tell which `Bar("a")` is called by which +call to `Foo()`. + +## Mocking Destructors ## + +Sometimes you want to make sure a mock object is destructed at the +right time, e.g. after `bar->A()` is called but before `bar->B()` is +called. We already know that you can specify constraints on the order +of mock function calls, so all we need to do is to mock the destructor +of the mock function. + +This sounds simple, except for one problem: a destructor is a special +function with special syntax and special semantics, and the +`MOCK_METHOD0` macro doesn't work for it: + +``` + MOCK_METHOD0(~MockFoo, void()); // Won't compile! +``` + +The good news is that you can use a simple pattern to achieve the same +effect. First, add a mock function `Die()` to your mock class and call +it in the destructor, like this: + +``` +class MockFoo : public Foo { + ... + // Add the following two lines to the mock class. + MOCK_METHOD0(Die, void()); + virtual ~MockFoo() { Die(); } +}; +``` + +(If the name `Die()` clashes with an existing symbol, choose another +name.) Now, we have translated the problem of testing when a `MockFoo` +object dies to testing when its `Die()` method is called: + +``` + MockFoo* foo = new MockFoo; + MockBar* bar = new MockBar; + ... + { + InSequence s; + + // Expects *foo to die after bar->A() and before bar->B(). + EXPECT_CALL(*bar, A()); + EXPECT_CALL(*foo, Die()); + EXPECT_CALL(*bar, B()); + } +``` + +And that's that. + +## Using Google Mock and Threads ## + +**IMPORTANT NOTE:** What we describe in this recipe is **ONLY** true on +platforms where Google Mock is thread-safe. Currently these are only +platforms that support the pthreads library (this includes Linux and Mac). +To make it thread-safe on other platforms we only need to implement +some synchronization operations in `"gtest/internal/gtest-port.h"`. + +In a **unit** test, it's best if you could isolate and test a piece of +code in a single-threaded context. That avoids race conditions and +dead locks, and makes debugging your test much easier. + +Yet many programs are multi-threaded, and sometimes to test something +we need to pound on it from more than one thread. Google Mock works +for this purpose too. + +Remember the steps for using a mock: + + 1. Create a mock object `foo`. + 1. Set its default actions and expectations using `ON_CALL()` and `EXPECT_CALL()`. + 1. The code under test calls methods of `foo`. + 1. Optionally, verify and reset the mock. + 1. Destroy the mock yourself, or let the code under test destroy it. The destructor will automatically verify it. + +If you follow the following simple rules, your mocks and threads can +live happily together: + + * Execute your _test code_ (as opposed to the code being tested) in _one_ thread. This makes your test easy to follow. + * Obviously, you can do step #1 without locking. + * When doing step #2 and #5, make sure no other thread is accessing `foo`. Obvious too, huh? + * #3 and #4 can be done either in one thread or in multiple threads - anyway you want. Google Mock takes care of the locking, so you don't have to do any - unless required by your test logic. + +If you violate the rules (for example, if you set expectations on a +mock while another thread is calling its methods), you get undefined +behavior. That's not fun, so don't do it. + +Google Mock guarantees that the action for a mock function is done in +the same thread that called the mock function. For example, in + +``` + EXPECT_CALL(mock, Foo(1)) + .WillOnce(action1); + EXPECT_CALL(mock, Foo(2)) + .WillOnce(action2); +``` + +if `Foo(1)` is called in thread 1 and `Foo(2)` is called in thread 2, +Google Mock will execute `action1` in thread 1 and `action2` in thread +2. + +Google Mock does _not_ impose a sequence on actions performed in +different threads (doing so may create deadlocks as the actions may +need to cooperate). This means that the execution of `action1` and +`action2` in the above example _may_ interleave. If this is a problem, +you should add proper synchronization logic to `action1` and `action2` +to make the test thread-safe. + + +Also, remember that `DefaultValue` is a global resource that +potentially affects _all_ living mock objects in your +program. Naturally, you won't want to mess with it from multiple +threads or when there still are mocks in action. + +## Controlling How Much Information Google Mock Prints ## + +When Google Mock sees something that has the potential of being an +error (e.g. a mock function with no expectation is called, a.k.a. an +uninteresting call, which is allowed but perhaps you forgot to +explicitly ban the call), it prints some warning messages, including +the arguments of the function and the return value. Hopefully this +will remind you to take a look and see if there is indeed a problem. + +Sometimes you are confident that your tests are correct and may not +appreciate such friendly messages. Some other times, you are debugging +your tests or learning about the behavior of the code you are testing, +and wish you could observe every mock call that happens (including +argument values and the return value). Clearly, one size doesn't fit +all. + +You can control how much Google Mock tells you using the +`--gmock_verbose=LEVEL` command-line flag, where `LEVEL` is a string +with three possible values: + + * `info`: Google Mock will print all informational messages, warnings, and errors (most verbose). At this setting, Google Mock will also log any calls to the `ON_CALL/EXPECT_CALL` macros. + * `warning`: Google Mock will print both warnings and errors (less verbose). This is the default. + * `error`: Google Mock will print errors only (least verbose). + +Alternatively, you can adjust the value of that flag from within your +tests like so: + +``` + ::testing::FLAGS_gmock_verbose = "error"; +``` + +Now, judiciously use the right flag to enable Google Mock serve you better! + +## Gaining Super Vision into Mock Calls ## + +You have a test using Google Mock. It fails: Google Mock tells you +that some expectations aren't satisfied. However, you aren't sure why: +Is there a typo somewhere in the matchers? Did you mess up the order +of the `EXPECT_CALL`s? Or is the code under test doing something +wrong? How can you find out the cause? + +Won't it be nice if you have X-ray vision and can actually see the +trace of all `EXPECT_CALL`s and mock method calls as they are made? +For each call, would you like to see its actual argument values and +which `EXPECT_CALL` Google Mock thinks it matches? + +You can unlock this power by running your test with the +`--gmock_verbose=info` flag. For example, given the test program: + +``` +using testing::_; +using testing::HasSubstr; +using testing::Return; + +class MockFoo { + public: + MOCK_METHOD2(F, void(const string& x, const string& y)); +}; + +TEST(Foo, Bar) { + MockFoo mock; + EXPECT_CALL(mock, F(_, _)).WillRepeatedly(Return()); + EXPECT_CALL(mock, F("a", "b")); + EXPECT_CALL(mock, F("c", HasSubstr("d"))); + + mock.F("a", "good"); + mock.F("a", "b"); +} +``` + +if you run it with `--gmock_verbose=info`, you will see this output: + +``` +[ RUN ] Foo.Bar + +foo_test.cc:14: EXPECT_CALL(mock, F(_, _)) invoked +foo_test.cc:15: EXPECT_CALL(mock, F("a", "b")) invoked +foo_test.cc:16: EXPECT_CALL(mock, F("c", HasSubstr("d"))) invoked +foo_test.cc:14: Mock function call matches EXPECT_CALL(mock, F(_, _))... + Function call: F(@0x7fff7c8dad40"a", @0x7fff7c8dad10"good") +foo_test.cc:15: Mock function call matches EXPECT_CALL(mock, F("a", "b"))... + Function call: F(@0x7fff7c8dada0"a", @0x7fff7c8dad70"b") +foo_test.cc:16: Failure +Actual function call count doesn't match EXPECT_CALL(mock, F("c", HasSubstr("d")))... + Expected: to be called once + Actual: never called - unsatisfied and active +[ FAILED ] Foo.Bar +``` + +Suppose the bug is that the `"c"` in the third `EXPECT_CALL` is a typo +and should actually be `"a"`. With the above message, you should see +that the actual `F("a", "good")` call is matched by the first +`EXPECT_CALL`, not the third as you thought. From that it should be +obvious that the third `EXPECT_CALL` is written wrong. Case solved. + +## Running Tests in Emacs ## + +If you build and run your tests in Emacs, the source file locations of +Google Mock and [Google Test](../../googletest/) +errors will be highlighted. Just press `` on one of them and +you'll be taken to the offending line. Or, you can just type `C-x `` +to jump to the next error. + +To make it even easier, you can add the following lines to your +`~/.emacs` file: + +``` +(global-set-key "\M-m" 'compile) ; m is for make +(global-set-key [M-down] 'next-error) +(global-set-key [M-up] '(lambda () (interactive) (next-error -1))) +``` + +Then you can type `M-m` to start a build, or `M-up`/`M-down` to move +back and forth between errors. + +## Fusing Google Mock Source Files ## + +Google Mock's implementation consists of dozens of files (excluding +its own tests). Sometimes you may want them to be packaged up in +fewer files instead, such that you can easily copy them to a new +machine and start hacking there. For this we provide an experimental +Python script `fuse_gmock_files.py` in the `scripts/` directory +(starting with release 1.2.0). Assuming you have Python 2.4 or above +installed on your machine, just go to that directory and run +``` +python fuse_gmock_files.py OUTPUT_DIR +``` + +and you should see an `OUTPUT_DIR` directory being created with files +`gtest/gtest.h`, `gmock/gmock.h`, and `gmock-gtest-all.cc` in it. +These three files contain everything you need to use Google Mock (and +Google Test). Just copy them to anywhere you want and you are ready +to write tests and use mocks. You can use the +[scrpts/test/Makefile](../scripts/test/Makefile) file as an example on how to compile your tests +against them. + +# Extending Google Mock # + +## Writing New Matchers Quickly ## + +The `MATCHER*` family of macros can be used to define custom matchers +easily. The syntax: + +``` +MATCHER(name, description_string_expression) { statements; } +``` + +will define a matcher with the given name that executes the +statements, which must return a `bool` to indicate if the match +succeeds. Inside the statements, you can refer to the value being +matched by `arg`, and refer to its type by `arg_type`. + +The description string is a `string`-typed expression that documents +what the matcher does, and is used to generate the failure message +when the match fails. It can (and should) reference the special +`bool` variable `negation`, and should evaluate to the description of +the matcher when `negation` is `false`, or that of the matcher's +negation when `negation` is `true`. + +For convenience, we allow the description string to be empty (`""`), +in which case Google Mock will use the sequence of words in the +matcher name as the description. + +For example: +``` +MATCHER(IsDivisibleBy7, "") { return (arg % 7) == 0; } +``` +allows you to write +``` + // Expects mock_foo.Bar(n) to be called where n is divisible by 7. + EXPECT_CALL(mock_foo, Bar(IsDivisibleBy7())); +``` +or, +``` +using ::testing::Not; +... + EXPECT_THAT(some_expression, IsDivisibleBy7()); + EXPECT_THAT(some_other_expression, Not(IsDivisibleBy7())); +``` +If the above assertions fail, they will print something like: +``` + Value of: some_expression + Expected: is divisible by 7 + Actual: 27 +... + Value of: some_other_expression + Expected: not (is divisible by 7) + Actual: 21 +``` +where the descriptions `"is divisible by 7"` and `"not (is divisible +by 7)"` are automatically calculated from the matcher name +`IsDivisibleBy7`. + +As you may have noticed, the auto-generated descriptions (especially +those for the negation) may not be so great. You can always override +them with a string expression of your own: +``` +MATCHER(IsDivisibleBy7, std::string(negation ? "isn't" : "is") + + " divisible by 7") { + return (arg % 7) == 0; +} +``` + +Optionally, you can stream additional information to a hidden argument +named `result_listener` to explain the match result. For example, a +better definition of `IsDivisibleBy7` is: +``` +MATCHER(IsDivisibleBy7, "") { + if ((arg % 7) == 0) + return true; + + *result_listener << "the remainder is " << (arg % 7); + return false; +} +``` + +With this definition, the above assertion will give a better message: +``` + Value of: some_expression + Expected: is divisible by 7 + Actual: 27 (the remainder is 6) +``` + +You should let `MatchAndExplain()` print _any additional information_ +that can help a user understand the match result. Note that it should +explain why the match succeeds in case of a success (unless it's +obvious) - this is useful when the matcher is used inside +`Not()`. There is no need to print the argument value itself, as +Google Mock already prints it for you. + +**Notes:** + + 1. The type of the value being matched (`arg_type`) is determined by the context in which you use the matcher and is supplied to you by the compiler, so you don't need to worry about declaring it (nor can you). This allows the matcher to be polymorphic. For example, `IsDivisibleBy7()` can be used to match any type where the value of `(arg % 7) == 0` can be implicitly converted to a `bool`. In the `Bar(IsDivisibleBy7())` example above, if method `Bar()` takes an `int`, `arg_type` will be `int`; if it takes an `unsigned long`, `arg_type` will be `unsigned long`; and so on. + 1. Google Mock doesn't guarantee when or how many times a matcher will be invoked. Therefore the matcher logic must be _purely functional_ (i.e. it cannot have any side effect, and the result must not depend on anything other than the value being matched and the matcher parameters). This requirement must be satisfied no matter how you define the matcher (e.g. using one of the methods described in the following recipes). In particular, a matcher can never call a mock function, as that will affect the state of the mock object and Google Mock. + +## Writing New Parameterized Matchers Quickly ## + +Sometimes you'll want to define a matcher that has parameters. For that you +can use the macro: +``` +MATCHER_P(name, param_name, description_string) { statements; } +``` +where the description string can be either `""` or a string expression +that references `negation` and `param_name`. + +For example: +``` +MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; } +``` +will allow you to write: +``` + EXPECT_THAT(Blah("a"), HasAbsoluteValue(n)); +``` +which may lead to this message (assuming `n` is 10): +``` + Value of: Blah("a") + Expected: has absolute value 10 + Actual: -9 +``` + +Note that both the matcher description and its parameter are +printed, making the message human-friendly. + +In the matcher definition body, you can write `foo_type` to +reference the type of a parameter named `foo`. For example, in the +body of `MATCHER_P(HasAbsoluteValue, value)` above, you can write +`value_type` to refer to the type of `value`. + +Google Mock also provides `MATCHER_P2`, `MATCHER_P3`, ..., up to +`MATCHER_P10` to support multi-parameter matchers: +``` +MATCHER_Pk(name, param_1, ..., param_k, description_string) { statements; } +``` + +Please note that the custom description string is for a particular +**instance** of the matcher, where the parameters have been bound to +actual values. Therefore usually you'll want the parameter values to +be part of the description. Google Mock lets you do that by +referencing the matcher parameters in the description string +expression. + +For example, +``` + using ::testing::PrintToString; + MATCHER_P2(InClosedRange, low, hi, + std::string(negation ? "isn't" : "is") + " in range [" + + PrintToString(low) + ", " + PrintToString(hi) + "]") { + return low <= arg && arg <= hi; + } + ... + EXPECT_THAT(3, InClosedRange(4, 6)); +``` +would generate a failure that contains the message: +``` + Expected: is in range [4, 6] +``` + +If you specify `""` as the description, the failure message will +contain the sequence of words in the matcher name followed by the +parameter values printed as a tuple. For example, +``` + MATCHER_P2(InClosedRange, low, hi, "") { ... } + ... + EXPECT_THAT(3, InClosedRange(4, 6)); +``` +would generate a failure that contains the text: +``` + Expected: in closed range (4, 6) +``` + +For the purpose of typing, you can view +``` +MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... } +``` +as shorthand for +``` +template +FooMatcherPk +Foo(p1_type p1, ..., pk_type pk) { ... } +``` + +When you write `Foo(v1, ..., vk)`, the compiler infers the types of +the parameters `v1`, ..., and `vk` for you. If you are not happy with +the result of the type inference, you can specify the types by +explicitly instantiating the template, as in `Foo(5, false)`. +As said earlier, you don't get to (or need to) specify +`arg_type` as that's determined by the context in which the matcher +is used. + +You can assign the result of expression `Foo(p1, ..., pk)` to a +variable of type `FooMatcherPk`. This can be +useful when composing matchers. Matchers that don't have a parameter +or have only one parameter have special types: you can assign `Foo()` +to a `FooMatcher`-typed variable, and assign `Foo(p)` to a +`FooMatcherP`-typed variable. + +While you can instantiate a matcher template with reference types, +passing the parameters by pointer usually makes your code more +readable. If, however, you still want to pass a parameter by +reference, be aware that in the failure message generated by the +matcher you will see the value of the referenced object but not its +address. + +You can overload matchers with different numbers of parameters: +``` +MATCHER_P(Blah, a, description_string_1) { ... } +MATCHER_P2(Blah, a, b, description_string_2) { ... } +``` + +While it's tempting to always use the `MATCHER*` macros when defining +a new matcher, you should also consider implementing +`MatcherInterface` or using `MakePolymorphicMatcher()` instead (see +the recipes that follow), especially if you need to use the matcher a +lot. While these approaches require more work, they give you more +control on the types of the value being matched and the matcher +parameters, which in general leads to better compiler error messages +that pay off in the long run. They also allow overloading matchers +based on parameter types (as opposed to just based on the number of +parameters). + +## Writing New Monomorphic Matchers ## + +A matcher of argument type `T` implements +`::testing::MatcherInterface` and does two things: it tests whether a +value of type `T` matches the matcher, and can describe what kind of +values it matches. The latter ability is used for generating readable +error messages when expectations are violated. + +The interface looks like this: + +``` +class MatchResultListener { + public: + ... + // Streams x to the underlying ostream; does nothing if the ostream + // is NULL. + template + MatchResultListener& operator<<(const T& x); + + // Returns the underlying ostream. + ::std::ostream* stream(); +}; + +template +class MatcherInterface { + public: + virtual ~MatcherInterface(); + + // Returns true iff the matcher matches x; also explains the match + // result to 'listener'. + virtual bool MatchAndExplain(T x, MatchResultListener* listener) const = 0; + + // Describes this matcher to an ostream. + virtual void DescribeTo(::std::ostream* os) const = 0; + + // Describes the negation of this matcher to an ostream. + virtual void DescribeNegationTo(::std::ostream* os) const; +}; +``` + +If you need a custom matcher but `Truly()` is not a good option (for +example, you may not be happy with the way `Truly(predicate)` +describes itself, or you may want your matcher to be polymorphic as +`Eq(value)` is), you can define a matcher to do whatever you want in +two steps: first implement the matcher interface, and then define a +factory function to create a matcher instance. The second step is not +strictly needed but it makes the syntax of using the matcher nicer. + +For example, you can define a matcher to test whether an `int` is +divisible by 7 and then use it like this: +``` +using ::testing::MakeMatcher; +using ::testing::Matcher; +using ::testing::MatcherInterface; +using ::testing::MatchResultListener; + +class DivisibleBy7Matcher : public MatcherInterface { + public: + virtual bool MatchAndExplain(int n, MatchResultListener* listener) const { + return (n % 7) == 0; + } + + virtual void DescribeTo(::std::ostream* os) const { + *os << "is divisible by 7"; + } + + virtual void DescribeNegationTo(::std::ostream* os) const { + *os << "is not divisible by 7"; + } +}; + +inline Matcher DivisibleBy7() { + return MakeMatcher(new DivisibleBy7Matcher); +} +... + + EXPECT_CALL(foo, Bar(DivisibleBy7())); +``` + +You may improve the matcher message by streaming additional +information to the `listener` argument in `MatchAndExplain()`: + +``` +class DivisibleBy7Matcher : public MatcherInterface { + public: + virtual bool MatchAndExplain(int n, + MatchResultListener* listener) const { + const int remainder = n % 7; + if (remainder != 0) { + *listener << "the remainder is " << remainder; + } + return remainder == 0; + } + ... +}; +``` + +Then, `EXPECT_THAT(x, DivisibleBy7());` may general a message like this: +``` +Value of: x +Expected: is divisible by 7 + Actual: 23 (the remainder is 2) +``` + +## Writing New Polymorphic Matchers ## + +You've learned how to write your own matchers in the previous +recipe. Just one problem: a matcher created using `MakeMatcher()` only +works for one particular type of arguments. If you want a +_polymorphic_ matcher that works with arguments of several types (for +instance, `Eq(x)` can be used to match a `value` as long as `value` == +`x` compiles -- `value` and `x` don't have to share the same type), +you can learn the trick from `"gmock/gmock-matchers.h"` but it's a bit +involved. + +Fortunately, most of the time you can define a polymorphic matcher +easily with the help of `MakePolymorphicMatcher()`. Here's how you can +define `NotNull()` as an example: + +``` +using ::testing::MakePolymorphicMatcher; +using ::testing::MatchResultListener; +using ::testing::NotNull; +using ::testing::PolymorphicMatcher; + +class NotNullMatcher { + public: + // To implement a polymorphic matcher, first define a COPYABLE class + // that has three members MatchAndExplain(), DescribeTo(), and + // DescribeNegationTo(), like the following. + + // In this example, we want to use NotNull() with any pointer, so + // MatchAndExplain() accepts a pointer of any type as its first argument. + // In general, you can define MatchAndExplain() as an ordinary method or + // a method template, or even overload it. + template + bool MatchAndExplain(T* p, + MatchResultListener* /* listener */) const { + return p != NULL; + } + + // Describes the property of a value matching this matcher. + void DescribeTo(::std::ostream* os) const { *os << "is not NULL"; } + + // Describes the property of a value NOT matching this matcher. + void DescribeNegationTo(::std::ostream* os) const { *os << "is NULL"; } +}; + +// To construct a polymorphic matcher, pass an instance of the class +// to MakePolymorphicMatcher(). Note the return type. +inline PolymorphicMatcher NotNull() { + return MakePolymorphicMatcher(NotNullMatcher()); +} +... + + EXPECT_CALL(foo, Bar(NotNull())); // The argument must be a non-NULL pointer. +``` + +**Note:** Your polymorphic matcher class does **not** need to inherit from +`MatcherInterface` or any other class, and its methods do **not** need +to be virtual. + +Like in a monomorphic matcher, you may explain the match result by +streaming additional information to the `listener` argument in +`MatchAndExplain()`. + +## Writing New Cardinalities ## + +A cardinality is used in `Times()` to tell Google Mock how many times +you expect a call to occur. It doesn't have to be exact. For example, +you can say `AtLeast(5)` or `Between(2, 4)`. + +If the built-in set of cardinalities doesn't suit you, you are free to +define your own by implementing the following interface (in namespace +`testing`): + +``` +class CardinalityInterface { + public: + virtual ~CardinalityInterface(); + + // Returns true iff call_count calls will satisfy this cardinality. + virtual bool IsSatisfiedByCallCount(int call_count) const = 0; + + // Returns true iff call_count calls will saturate this cardinality. + virtual bool IsSaturatedByCallCount(int call_count) const = 0; + + // Describes self to an ostream. + virtual void DescribeTo(::std::ostream* os) const = 0; +}; +``` + +For example, to specify that a call must occur even number of times, +you can write + +``` +using ::testing::Cardinality; +using ::testing::CardinalityInterface; +using ::testing::MakeCardinality; + +class EvenNumberCardinality : public CardinalityInterface { + public: + virtual bool IsSatisfiedByCallCount(int call_count) const { + return (call_count % 2) == 0; + } + + virtual bool IsSaturatedByCallCount(int call_count) const { + return false; + } + + virtual void DescribeTo(::std::ostream* os) const { + *os << "called even number of times"; + } +}; + +Cardinality EvenNumber() { + return MakeCardinality(new EvenNumberCardinality); +} +... + + EXPECT_CALL(foo, Bar(3)) + .Times(EvenNumber()); +``` + +## Writing New Actions Quickly ## + +If the built-in actions don't work for you, and you find it +inconvenient to use `Invoke()`, you can use a macro from the `ACTION*` +family to quickly define a new action that can be used in your code as +if it's a built-in action. + +By writing +``` +ACTION(name) { statements; } +``` +in a namespace scope (i.e. not inside a class or function), you will +define an action with the given name that executes the statements. +The value returned by `statements` will be used as the return value of +the action. Inside the statements, you can refer to the K-th +(0-based) argument of the mock function as `argK`. For example: +``` +ACTION(IncrementArg1) { return ++(*arg1); } +``` +allows you to write +``` +... WillOnce(IncrementArg1()); +``` + +Note that you don't need to specify the types of the mock function +arguments. Rest assured that your code is type-safe though: +you'll get a compiler error if `*arg1` doesn't support the `++` +operator, or if the type of `++(*arg1)` isn't compatible with the mock +function's return type. + +Another example: +``` +ACTION(Foo) { + (*arg2)(5); + Blah(); + *arg1 = 0; + return arg0; +} +``` +defines an action `Foo()` that invokes argument #2 (a function pointer) +with 5, calls function `Blah()`, sets the value pointed to by argument +#1 to 0, and returns argument #0. + +For more convenience and flexibility, you can also use the following +pre-defined symbols in the body of `ACTION`: + +| `argK_type` | The type of the K-th (0-based) argument of the mock function | +|:------------|:-------------------------------------------------------------| +| `args` | All arguments of the mock function as a tuple | +| `args_type` | The type of all arguments of the mock function as a tuple | +| `return_type` | The return type of the mock function | +| `function_type` | The type of the mock function | + +For example, when using an `ACTION` as a stub action for mock function: +``` +int DoSomething(bool flag, int* ptr); +``` +we have: + +| **Pre-defined Symbol** | **Is Bound To** | +|:-----------------------|:----------------| +| `arg0` | the value of `flag` | +| `arg0_type` | the type `bool` | +| `arg1` | the value of `ptr` | +| `arg1_type` | the type `int*` | +| `args` | the tuple `(flag, ptr)` | +| `args_type` | the type `::testing::tuple` | +| `return_type` | the type `int` | +| `function_type` | the type `int(bool, int*)` | + +## Writing New Parameterized Actions Quickly ## + +Sometimes you'll want to parameterize an action you define. For that +we have another macro +``` +ACTION_P(name, param) { statements; } +``` + +For example, +``` +ACTION_P(Add, n) { return arg0 + n; } +``` +will allow you to write +``` +// Returns argument #0 + 5. +... WillOnce(Add(5)); +``` + +For convenience, we use the term _arguments_ for the values used to +invoke the mock function, and the term _parameters_ for the values +used to instantiate an action. + +Note that you don't need to provide the type of the parameter either. +Suppose the parameter is named `param`, you can also use the +Google-Mock-defined symbol `param_type` to refer to the type of the +parameter as inferred by the compiler. For example, in the body of +`ACTION_P(Add, n)` above, you can write `n_type` for the type of `n`. + +Google Mock also provides `ACTION_P2`, `ACTION_P3`, and etc to support +multi-parameter actions. For example, +``` +ACTION_P2(ReturnDistanceTo, x, y) { + double dx = arg0 - x; + double dy = arg1 - y; + return sqrt(dx*dx + dy*dy); +} +``` +lets you write +``` +... WillOnce(ReturnDistanceTo(5.0, 26.5)); +``` + +You can view `ACTION` as a degenerated parameterized action where the +number of parameters is 0. + +You can also easily define actions overloaded on the number of parameters: +``` +ACTION_P(Plus, a) { ... } +ACTION_P2(Plus, a, b) { ... } +``` + +## Restricting the Type of an Argument or Parameter in an ACTION ## + +For maximum brevity and reusability, the `ACTION*` macros don't ask +you to provide the types of the mock function arguments and the action +parameters. Instead, we let the compiler infer the types for us. + +Sometimes, however, we may want to be more explicit about the types. +There are several tricks to do that. For example: +``` +ACTION(Foo) { + // Makes sure arg0 can be converted to int. + int n = arg0; + ... use n instead of arg0 here ... +} + +ACTION_P(Bar, param) { + // Makes sure the type of arg1 is const char*. + ::testing::StaticAssertTypeEq(); + + // Makes sure param can be converted to bool. + bool flag = param; +} +``` +where `StaticAssertTypeEq` is a compile-time assertion in Google Test +that verifies two types are the same. + +## Writing New Action Templates Quickly ## + +Sometimes you want to give an action explicit template parameters that +cannot be inferred from its value parameters. `ACTION_TEMPLATE()` +supports that and can be viewed as an extension to `ACTION()` and +`ACTION_P*()`. + +The syntax: +``` +ACTION_TEMPLATE(ActionName, + HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m), + AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; } +``` + +defines an action template that takes _m_ explicit template parameters +and _n_ value parameters, where _m_ is between 1 and 10, and _n_ is +between 0 and 10. `name_i` is the name of the i-th template +parameter, and `kind_i` specifies whether it's a `typename`, an +integral constant, or a template. `p_i` is the name of the i-th value +parameter. + +Example: +``` +// DuplicateArg(output) converts the k-th argument of the mock +// function to type T and copies it to *output. +ACTION_TEMPLATE(DuplicateArg, + // Note the comma between int and k: + HAS_2_TEMPLATE_PARAMS(int, k, typename, T), + AND_1_VALUE_PARAMS(output)) { + *output = T(::testing::get(args)); +} +``` + +To create an instance of an action template, write: +``` + ActionName(v1, ..., v_n) +``` +where the `t`s are the template arguments and the +`v`s are the value arguments. The value argument +types are inferred by the compiler. For example: +``` +using ::testing::_; +... + int n; + EXPECT_CALL(mock, Foo(_, _)) + .WillOnce(DuplicateArg<1, unsigned char>(&n)); +``` + +If you want to explicitly specify the value argument types, you can +provide additional template arguments: +``` + ActionName(v1, ..., v_n) +``` +where `u_i` is the desired type of `v_i`. + +`ACTION_TEMPLATE` and `ACTION`/`ACTION_P*` can be overloaded on the +number of value parameters, but not on the number of template +parameters. Without the restriction, the meaning of the following is +unclear: + +``` + OverloadedAction(x); +``` + +Are we using a single-template-parameter action where `bool` refers to +the type of `x`, or a two-template-parameter action where the compiler +is asked to infer the type of `x`? + +## Using the ACTION Object's Type ## + +If you are writing a function that returns an `ACTION` object, you'll +need to know its type. The type depends on the macro used to define +the action and the parameter types. The rule is relatively simple: + +| **Given Definition** | **Expression** | **Has Type** | +|:---------------------|:---------------|:-------------| +| `ACTION(Foo)` | `Foo()` | `FooAction` | +| `ACTION_TEMPLATE(Foo, HAS_m_TEMPLATE_PARAMS(...), AND_0_VALUE_PARAMS())` | `Foo()` | `FooAction` | +| `ACTION_P(Bar, param)` | `Bar(int_value)` | `BarActionP` | +| `ACTION_TEMPLATE(Bar, HAS_m_TEMPLATE_PARAMS(...), AND_1_VALUE_PARAMS(p1))` | `Bar(int_value)` | `FooActionP` | +| `ACTION_P2(Baz, p1, p2)` | `Baz(bool_value, int_value)` | `BazActionP2` | +| `ACTION_TEMPLATE(Baz, HAS_m_TEMPLATE_PARAMS(...), AND_2_VALUE_PARAMS(p1, p2))`| `Baz(bool_value, int_value)` | `FooActionP2` | +| ... | ... | ... | + +Note that we have to pick different suffixes (`Action`, `ActionP`, +`ActionP2`, and etc) for actions with different numbers of value +parameters, or the action definitions cannot be overloaded on the +number of them. + +## Writing New Monomorphic Actions ## + +While the `ACTION*` macros are very convenient, sometimes they are +inappropriate. For example, despite the tricks shown in the previous +recipes, they don't let you directly specify the types of the mock +function arguments and the action parameters, which in general leads +to unoptimized compiler error messages that can baffle unfamiliar +users. They also don't allow overloading actions based on parameter +types without jumping through some hoops. + +An alternative to the `ACTION*` macros is to implement +`::testing::ActionInterface`, where `F` is the type of the mock +function in which the action will be used. For example: + +``` +template class ActionInterface { + public: + virtual ~ActionInterface(); + + // Performs the action. Result is the return type of function type + // F, and ArgumentTuple is the tuple of arguments of F. + // + // For example, if F is int(bool, const string&), then Result would + // be int, and ArgumentTuple would be ::testing::tuple. + virtual Result Perform(const ArgumentTuple& args) = 0; +}; + +using ::testing::_; +using ::testing::Action; +using ::testing::ActionInterface; +using ::testing::MakeAction; + +typedef int IncrementMethod(int*); + +class IncrementArgumentAction : public ActionInterface { + public: + virtual int Perform(const ::testing::tuple& args) { + int* p = ::testing::get<0>(args); // Grabs the first argument. + return *p++; + } +}; + +Action IncrementArgument() { + return MakeAction(new IncrementArgumentAction); +} +... + + EXPECT_CALL(foo, Baz(_)) + .WillOnce(IncrementArgument()); + + int n = 5; + foo.Baz(&n); // Should return 5 and change n to 6. +``` + +## Writing New Polymorphic Actions ## + +The previous recipe showed you how to define your own action. This is +all good, except that you need to know the type of the function in +which the action will be used. Sometimes that can be a problem. For +example, if you want to use the action in functions with _different_ +types (e.g. like `Return()` and `SetArgPointee()`). + +If an action can be used in several types of mock functions, we say +it's _polymorphic_. The `MakePolymorphicAction()` function template +makes it easy to define such an action: + +``` +namespace testing { + +template +PolymorphicAction MakePolymorphicAction(const Impl& impl); + +} // namespace testing +``` + +As an example, let's define an action that returns the second argument +in the mock function's argument list. The first step is to define an +implementation class: + +``` +class ReturnSecondArgumentAction { + public: + template + Result Perform(const ArgumentTuple& args) const { + // To get the i-th (0-based) argument, use ::testing::get(args). + return ::testing::get<1>(args); + } +}; +``` + +This implementation class does _not_ need to inherit from any +particular class. What matters is that it must have a `Perform()` +method template. This method template takes the mock function's +arguments as a tuple in a **single** argument, and returns the result of +the action. It can be either `const` or not, but must be invokable +with exactly one template argument, which is the result type. In other +words, you must be able to call `Perform(args)` where `R` is the +mock function's return type and `args` is its arguments in a tuple. + +Next, we use `MakePolymorphicAction()` to turn an instance of the +implementation class into the polymorphic action we need. It will be +convenient to have a wrapper for this: + +``` +using ::testing::MakePolymorphicAction; +using ::testing::PolymorphicAction; + +PolymorphicAction ReturnSecondArgument() { + return MakePolymorphicAction(ReturnSecondArgumentAction()); +} +``` + +Now, you can use this polymorphic action the same way you use the +built-in ones: + +``` +using ::testing::_; + +class MockFoo : public Foo { + public: + MOCK_METHOD2(DoThis, int(bool flag, int n)); + MOCK_METHOD3(DoThat, string(int x, const char* str1, const char* str2)); +}; +... + + MockFoo foo; + EXPECT_CALL(foo, DoThis(_, _)) + .WillOnce(ReturnSecondArgument()); + EXPECT_CALL(foo, DoThat(_, _, _)) + .WillOnce(ReturnSecondArgument()); + ... + foo.DoThis(true, 5); // Will return 5. + foo.DoThat(1, "Hi", "Bye"); // Will return "Hi". +``` + +## Teaching Google Mock How to Print Your Values ## + +When an uninteresting or unexpected call occurs, Google Mock prints the +argument values and the stack trace to help you debug. Assertion +macros like `EXPECT_THAT` and `EXPECT_EQ` also print the values in +question when the assertion fails. Google Mock and Google Test do this using +Google Test's user-extensible value printer. + +This printer knows how to print built-in C++ types, native arrays, STL +containers, and any type that supports the `<<` operator. For other +types, it prints the raw bytes in the value and hopes that you the +user can figure it out. +[Google Test's advanced guide](../../googletest/docs/AdvancedGuide.md#teaching-google-test-how-to-print-your-values) +explains how to extend the printer to do a better job at +printing your particular type than to dump the bytes. diff --git a/tools/external/googletest/googlemock/docs/DesignDoc.md b/tools/external/googletest/googlemock/docs/DesignDoc.md new file mode 100644 index 00000000..3f515c3b --- /dev/null +++ b/tools/external/googletest/googlemock/docs/DesignDoc.md @@ -0,0 +1,280 @@ +This page discusses the design of new Google Mock features. + + + +# Macros for Defining Actions # + +## Problem ## + +Due to the lack of closures in C++, it currently requires some +non-trivial effort to define a custom action in Google Mock. For +example, suppose you want to "increment the value pointed to by the +second argument of the mock function and return it", you could write: + +``` +int IncrementArg1(Unused, int* p, Unused) { + return ++(*p); +} + +... WillOnce(Invoke(IncrementArg1)); +``` + +There are several things unsatisfactory about this approach: + + * Even though the action only cares about the second argument of the mock function, its definition needs to list other arguments as dummies. This is tedious. + * The defined action is usable only in mock functions that takes exactly 3 arguments - an unnecessary restriction. + * To use the action, one has to say `Invoke(IncrementArg1)`, which isn't as nice as `IncrementArg1()`. + +The latter two problems can be overcome using `MakePolymorphicAction()`, +but it requires much more boilerplate code: + +``` +class IncrementArg1Action { + public: + template + Result Perform(const ArgumentTuple& args) const { + return ++(*tr1::get<1>(args)); + } +}; + +PolymorphicAction IncrementArg1() { + return MakePolymorphicAction(IncrementArg1Action()); +} + +... WillOnce(IncrementArg1()); +``` + +Our goal is to allow defining custom actions with the least amount of +boiler-plate C++ requires. + +## Solution ## + +We propose to introduce a new macro: +``` +ACTION(name) { statements; } +``` + +Using this in a namespace scope will define an action with the given +name that executes the statements. Inside the statements, you can +refer to the K-th (0-based) argument of the mock function as `argK`. +For example: +``` +ACTION(IncrementArg1) { return ++(*arg1); } +``` +allows you to write +``` +... WillOnce(IncrementArg1()); +``` + +Note that you don't need to specify the types of the mock function +arguments, as brevity is a top design goal here. Rest assured that +your code is still type-safe though: you'll get a compiler error if +`*arg1` doesn't support the `++` operator, or if the type of +`++(*arg1)` isn't compatible with the mock function's return type. + +Another example: +``` +ACTION(Foo) { + (*arg2)(5); + Blah(); + *arg1 = 0; + return arg0; +} +``` +defines an action `Foo()` that invokes argument #2 (a function pointer) +with 5, calls function `Blah()`, sets the value pointed to by argument +#1 to 0, and returns argument #0. + +For more convenience and flexibility, you can also use the following +pre-defined symbols in the body of `ACTION`: + +| `argK_type` | The type of the K-th (0-based) argument of the mock function | +|:------------|:-------------------------------------------------------------| +| `args` | All arguments of the mock function as a tuple | +| `args_type` | The type of all arguments of the mock function as a tuple | +| `return_type` | The return type of the mock function | +| `function_type` | The type of the mock function | + +For example, when using an `ACTION` as a stub action for mock function: +``` +int DoSomething(bool flag, int* ptr); +``` +we have: +| **Pre-defined Symbol** | **Is Bound To** | +|:-----------------------|:----------------| +| `arg0` | the value of `flag` | +| `arg0_type` | the type `bool` | +| `arg1` | the value of `ptr` | +| `arg1_type` | the type `int*` | +| `args` | the tuple `(flag, ptr)` | +| `args_type` | the type `std::tr1::tuple` | +| `return_type` | the type `int` | +| `function_type` | the type `int(bool, int*)` | + +## Parameterized actions ## + +Sometimes you'll want to parameterize the action. For that we propose +another macro +``` +ACTION_P(name, param) { statements; } +``` + +For example, +``` +ACTION_P(Add, n) { return arg0 + n; } +``` +will allow you to write +``` +// Returns argument #0 + 5. +... WillOnce(Add(5)); +``` + +For convenience, we use the term _arguments_ for the values used to +invoke the mock function, and the term _parameters_ for the values +used to instantiate an action. + +Note that you don't need to provide the type of the parameter either. +Suppose the parameter is named `param`, you can also use the +Google-Mock-defined symbol `param_type` to refer to the type of the +parameter as inferred by the compiler. + +We will also provide `ACTION_P2`, `ACTION_P3`, and etc to support +multi-parameter actions. For example, +``` +ACTION_P2(ReturnDistanceTo, x, y) { + double dx = arg0 - x; + double dy = arg1 - y; + return sqrt(dx*dx + dy*dy); +} +``` +lets you write +``` +... WillOnce(ReturnDistanceTo(5.0, 26.5)); +``` + +You can view `ACTION` as a degenerated parameterized action where the +number of parameters is 0. + +## Advanced Usages ## + +### Overloading Actions ### + +You can easily define actions overloaded on the number of parameters: +``` +ACTION_P(Plus, a) { ... } +ACTION_P2(Plus, a, b) { ... } +``` + +### Restricting the Type of an Argument or Parameter ### + +For maximum brevity and reusability, the `ACTION*` macros don't let +you specify the types of the mock function arguments and the action +parameters. Instead, we let the compiler infer the types for us. + +Sometimes, however, we may want to be more explicit about the types. +There are several tricks to do that. For example: +``` +ACTION(Foo) { + // Makes sure arg0 can be converted to int. + int n = arg0; + ... use n instead of arg0 here ... +} + +ACTION_P(Bar, param) { + // Makes sure the type of arg1 is const char*. + ::testing::StaticAssertTypeEq(); + + // Makes sure param can be converted to bool. + bool flag = param; +} +``` +where `StaticAssertTypeEq` is a compile-time assertion we plan to add to +Google Test (the name is chosen to match `static_assert` in C++0x). + +### Using the ACTION Object's Type ### + +If you are writing a function that returns an `ACTION` object, you'll +need to know its type. The type depends on the macro used to define +the action and the parameter types. The rule is relatively simple: +| **Given Definition** | **Expression** | **Has Type** | +|:---------------------|:---------------|:-------------| +| `ACTION(Foo)` | `Foo()` | `FooAction` | +| `ACTION_P(Bar, param)` | `Bar(int_value)` | `BarActionP` | +| `ACTION_P2(Baz, p1, p2)` | `Baz(bool_value, int_value)` | `BazActionP2` | +| ... | ... | ... | + +Note that we have to pick different suffixes (`Action`, `ActionP`, +`ActionP2`, and etc) for actions with different numbers of parameters, +or the action definitions cannot be overloaded on the number of +parameters. + +## When to Use ## + +While the new macros are very convenient, please also consider other +means of implementing actions (e.g. via `ActionInterface` or +`MakePolymorphicAction()`), especially if you need to use the defined +action a lot. While the other approaches require more work, they give +you more control on the types of the mock function arguments and the +action parameters, which in general leads to better compiler error +messages that pay off in the long run. They also allow overloading +actions based on parameter types, as opposed to just the number of +parameters. + +## Related Work ## + +As you may have realized, the `ACTION*` macros resemble closures (also +known as lambda expressions or anonymous functions). Indeed, both of +them seek to lower the syntactic overhead for defining a function. + +C++0x will support lambdas, but they are not part of C++ right now. +Some non-standard libraries (most notably BLL or Boost Lambda Library) +try to alleviate this problem. However, they are not a good choice +for defining actions as: + + * They are non-standard and not widely installed. Google Mock only depends on standard libraries and `tr1::tuple`, which is part of the new C++ standard and comes with gcc 4+. We want to keep it that way. + * They are not trivial to learn. + * They will become obsolete when C++0x's lambda feature is widely supported. We don't want to make our users use a dying library. + * Since they are based on operators, they are rather ad hoc: you cannot use statements, and you cannot pass the lambda arguments to a function, for example. + * They have subtle semantics that easily confuses new users. For example, in expression `_1++ + foo++`, `foo` will be incremented only once where the expression is evaluated, while `_1` will be incremented every time the unnamed function is invoked. This is far from intuitive. + +`ACTION*` avoid all these problems. + +## Future Improvements ## + +There may be a need for composing `ACTION*` definitions (i.e. invoking +another `ACTION` inside the definition of one `ACTION*`). We are not +sure we want it yet, as one can get a similar effect by putting +`ACTION` definitions in function templates and composing the function +templates. We'll revisit this based on user feedback. + +The reason we don't allow `ACTION*()` inside a function body is that +the current C++ standard doesn't allow function-local types to be used +to instantiate templates. The upcoming C++0x standard will lift this +restriction. Once this feature is widely supported by compilers, we +can revisit the implementation and add support for using `ACTION*()` +inside a function. + +C++0x will also support lambda expressions. When they become +available, we may want to support using lambdas as actions. + +# Macros for Defining Matchers # + +Once the macros for defining actions are implemented, we plan to do +the same for matchers: + +``` +MATCHER(name) { statements; } +``` + +where you can refer to the value being matched as `arg`. For example, +given: + +``` +MATCHER(IsPositive) { return arg > 0; } +``` + +you can use `IsPositive()` as a matcher that matches a value iff it is +greater than 0. + +We will also add `MATCHER_P`, `MATCHER_P2`, and etc for parameterized +matchers. \ No newline at end of file diff --git a/tools/external/googletest/googlemock/docs/DevGuide.md b/tools/external/googletest/googlemock/docs/DevGuide.md new file mode 100644 index 00000000..cae07e70 --- /dev/null +++ b/tools/external/googletest/googlemock/docs/DevGuide.md @@ -0,0 +1,132 @@ + + +If you are interested in understanding the internals of Google Mock, +building from source, or contributing ideas or modifications to the +project, then this document is for you. + +# Introduction # + +First, let's give you some background of the project. + +## Licensing ## + +All Google Mock source and pre-built packages are provided under the [New BSD License](http://www.opensource.org/licenses/bsd-license.php). + +## The Google Mock Community ## + +The Google Mock community exists primarily through the [discussion group](http://groups.google.com/group/googlemock), the +[issue tracker](https://github.com/google/googletest/issues) and, to a lesser extent, the [source control repository](../). You are definitely encouraged to contribute to the +discussion and you can also help us to keep the effectiveness of the +group high by following and promoting the guidelines listed here. + +### Please Be Friendly ### + +Showing courtesy and respect to others is a vital part of the Google +culture, and we strongly encourage everyone participating in Google +Mock development to join us in accepting nothing less. Of course, +being courteous is not the same as failing to constructively disagree +with each other, but it does mean that we should be respectful of each +other when enumerating the 42 technical reasons that a particular +proposal may not be the best choice. There's never a reason to be +antagonistic or dismissive toward anyone who is sincerely trying to +contribute to a discussion. + +Sure, C++ testing is serious business and all that, but it's also +a lot of fun. Let's keep it that way. Let's strive to be one of the +friendliest communities in all of open source. + +### Where to Discuss Google Mock ### + +As always, discuss Google Mock in the official [Google C++ Mocking Framework discussion group](http://groups.google.com/group/googlemock). You don't have to actually submit +code in order to sign up. Your participation itself is a valuable +contribution. + +# Working with the Code # + +If you want to get your hands dirty with the code inside Google Mock, +this is the section for you. + +## Checking Out the Source from Subversion ## + +Checking out the Google Mock source is most useful if you plan to +tweak it yourself. You check out the source for Google Mock using a +[Subversion](http://subversion.tigris.org/) client as you would for any +other project hosted on Google Code. Please see the instruction on +the [source code access page](../) for how to do it. + +## Compiling from Source ## + +Once you check out the code, you can find instructions on how to +compile it in the [README](../README.md) file. + +## Testing ## + +A mocking framework is of no good if itself is not thoroughly tested. +Tests should be written for any new code, and changes should be +verified to not break existing tests before they are submitted for +review. To perform the tests, follow the instructions in [README](../README.md) and +verify that there are no failures. + +# Contributing Code # + +We are excited that Google Mock is now open source, and hope to get +great patches from the community. Before you fire up your favorite IDE +and begin hammering away at that new feature, though, please take the +time to read this section and understand the process. While it seems +rigorous, we want to keep a high standard of quality in the code +base. + +## Contributor License Agreements ## + +You must sign a Contributor License Agreement (CLA) before we can +accept any code. The CLA protects you and us. + + * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html). + * If you work for a company that wants to allow you to contribute your work to Google Mock, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html). + +Follow either of the two links above to access the appropriate CLA and +instructions for how to sign and return it. + +## Coding Style ## + +To keep the source consistent, readable, diffable and easy to merge, +we use a fairly rigid coding style, as defined by the [google-styleguide](https://github.com/google/styleguide) project. All patches will be expected +to conform to the style outlined [here](https://google.github.io/styleguide/cppguide.html). + +## Submitting Patches ## + +Please do submit code. Here's what you need to do: + + 1. Normally you should make your change against the SVN trunk instead of a branch or a tag, as the latter two are for release control and should be treated mostly as read-only. + 1. Decide which code you want to submit. A submission should be a set of changes that addresses one issue in the [Google Mock issue tracker](https://github.com/google/googletest/issues). Please don't mix more than one logical change per submittal, because it makes the history hard to follow. If you want to make a change that doesn't have a corresponding issue in the issue tracker, please create one. + 1. Also, coordinate with team members that are listed on the issue in question. This ensures that work isn't being duplicated and communicating your plan early also generally leads to better patches. + 1. Ensure that your code adheres to the [Google Mock source code style](#Coding_Style.md). + 1. Ensure that there are unit tests for your code. + 1. Sign a Contributor License Agreement. + 1. Create a patch file using `svn diff`. + 1. We use [Rietveld](http://codereview.appspot.com/) to do web-based code reviews. You can read about the tool [here](https://github.com/rietveld-codereview/rietveld/wiki). When you are ready, upload your patch via Rietveld and notify `googlemock@googlegroups.com` to review it. There are several ways to upload the patch. We recommend using the [upload\_gmock.py](../scripts/upload_gmock.py) script, which you can find in the `scripts/` folder in the SVN trunk. + +## Google Mock Committers ## + +The current members of the Google Mock engineering team are the only +committers at present. In the great tradition of eating one's own +dogfood, we will be requiring each new Google Mock engineering team +member to earn the right to become a committer by following the +procedures in this document, writing consistently great code, and +demonstrating repeatedly that he or she truly gets the zen of Google +Mock. + +# Release Process # + +We follow the typical release process for Subversion-based projects: + + 1. A release branch named `release-X.Y` is created. + 1. Bugs are fixed and features are added in trunk; those individual patches are merged into the release branch until it's stable. + 1. An individual point release (the `Z` in `X.Y.Z`) is made by creating a tag from the branch. + 1. Repeat steps 2 and 3 throughout one release cycle (as determined by features or time). + 1. Go back to step 1 to create another release branch and so on. + + +--- + +This page is based on the [Making GWT Better](http://code.google.com/webtoolkit/makinggwtbetter.html) guide from the [Google Web Toolkit](http://code.google.com/webtoolkit/) project. Except as otherwise [noted](http://code.google.com/policies.html#restrictions), the content of this page is licensed under the [Creative Commons Attribution 2.5 License](http://creativecommons.org/licenses/by/2.5/). diff --git a/tools/external/googletest/googlemock/docs/Documentation.md b/tools/external/googletest/googlemock/docs/Documentation.md new file mode 100644 index 00000000..a0311871 --- /dev/null +++ b/tools/external/googletest/googlemock/docs/Documentation.md @@ -0,0 +1,15 @@ +This page lists all documentation markdown files for Google Mock **(the +current git version)** +-- **if you use a former version of Google Mock, please read the +documentation for that specific version instead (e.g. by checking out +the respective git branch/tag).** + + * [ForDummies](ForDummies.md) -- start here if you are new to Google Mock. + * [CheatSheet](CheatSheet.md) -- a quick reference. + * [CookBook](CookBook.md) -- recipes for doing various tasks using Google Mock. + * [FrequentlyAskedQuestions](FrequentlyAskedQuestions.md) -- check here before asking a question on the mailing list. + +To contribute code to Google Mock, read: + + * [DevGuide](DevGuide.md) -- read this _before_ writing your first patch. + * [Pump Manual](../../googletest/docs/PumpManual.md) -- how we generate some of Google Mock's source files. diff --git a/tools/external/googletest/googlemock/docs/ForDummies.md b/tools/external/googletest/googlemock/docs/ForDummies.md new file mode 100644 index 00000000..76910569 --- /dev/null +++ b/tools/external/googletest/googlemock/docs/ForDummies.md @@ -0,0 +1,447 @@ + + +(**Note:** If you get compiler errors that you don't understand, be sure to consult [Google Mock Doctor](FrequentlyAskedQuestions.md#how-am-i-supposed-to-make-sense-of-these-horrible-template-errors).) + +# What Is Google C++ Mocking Framework? # +When you write a prototype or test, often it's not feasible or wise to rely on real objects entirely. A **mock object** implements the same interface as a real object (so it can be used as one), but lets you specify at run time how it will be used and what it should do (which methods will be called? in which order? how many times? with what arguments? what will they return? etc). + +**Note:** It is easy to confuse the term _fake objects_ with mock objects. Fakes and mocks actually mean very different things in the Test-Driven Development (TDD) community: + + * **Fake** objects have working implementations, but usually take some shortcut (perhaps to make the operations less expensive), which makes them not suitable for production. An in-memory file system would be an example of a fake. + * **Mocks** are objects pre-programmed with _expectations_, which form a specification of the calls they are expected to receive. + +If all this seems too abstract for you, don't worry - the most important thing to remember is that a mock allows you to check the _interaction_ between itself and code that uses it. The difference between fakes and mocks will become much clearer once you start to use mocks. + +**Google C++ Mocking Framework** (or **Google Mock** for short) is a library (sometimes we also call it a "framework" to make it sound cool) for creating mock classes and using them. It does to C++ what [jMock](http://www.jmock.org/) and [EasyMock](http://www.easymock.org/) do to Java. + +Using Google Mock involves three basic steps: + + 1. Use some simple macros to describe the interface you want to mock, and they will expand to the implementation of your mock class; + 1. Create some mock objects and specify its expectations and behavior using an intuitive syntax; + 1. Exercise code that uses the mock objects. Google Mock will catch any violation of the expectations as soon as it arises. + +# Why Google Mock? # +While mock objects help you remove unnecessary dependencies in tests and make them fast and reliable, using mocks manually in C++ is _hard_: + + * Someone has to implement the mocks. The job is usually tedious and error-prone. No wonder people go great distances to avoid it. + * The quality of those manually written mocks is a bit, uh, unpredictable. You may see some really polished ones, but you may also see some that were hacked up in a hurry and have all sorts of ad-hoc restrictions. + * The knowledge you gained from using one mock doesn't transfer to the next. + +In contrast, Java and Python programmers have some fine mock frameworks, which automate the creation of mocks. As a result, mocking is a proven effective technique and widely adopted practice in those communities. Having the right tool absolutely makes the difference. + +Google Mock was built to help C++ programmers. It was inspired by [jMock](http://www.jmock.org/) and [EasyMock](http://www.easymock.org/), but designed with C++'s specifics in mind. It is your friend if any of the following problems is bothering you: + + * You are stuck with a sub-optimal design and wish you had done more prototyping before it was too late, but prototyping in C++ is by no means "rapid". + * Your tests are slow as they depend on too many libraries or use expensive resources (e.g. a database). + * Your tests are brittle as some resources they use are unreliable (e.g. the network). + * You want to test how your code handles a failure (e.g. a file checksum error), but it's not easy to cause one. + * You need to make sure that your module interacts with other modules in the right way, but it's hard to observe the interaction; therefore you resort to observing the side effects at the end of the action, which is awkward at best. + * You want to "mock out" your dependencies, except that they don't have mock implementations yet; and, frankly, you aren't thrilled by some of those hand-written mocks. + +We encourage you to use Google Mock as: + + * a _design_ tool, for it lets you experiment with your interface design early and often. More iterations lead to better designs! + * a _testing_ tool to cut your tests' outbound dependencies and probe the interaction between your module and its collaborators. + +# Getting Started # +Using Google Mock is easy! Inside your C++ source file, just `#include` `"gtest/gtest.h"` and `"gmock/gmock.h"`, and you are ready to go. + +# A Case for Mock Turtles # +Let's look at an example. Suppose you are developing a graphics program that relies on a LOGO-like API for drawing. How would you test that it does the right thing? Well, you can run it and compare the screen with a golden screen snapshot, but let's admit it: tests like this are expensive to run and fragile (What if you just upgraded to a shiny new graphics card that has better anti-aliasing? Suddenly you have to update all your golden images.). It would be too painful if all your tests are like this. Fortunately, you learned about Dependency Injection and know the right thing to do: instead of having your application talk to the drawing API directly, wrap the API in an interface (say, `Turtle`) and code to that interface: + +``` +class Turtle { + ... + virtual ~Turtle() {} + virtual void PenUp() = 0; + virtual void PenDown() = 0; + virtual void Forward(int distance) = 0; + virtual void Turn(int degrees) = 0; + virtual void GoTo(int x, int y) = 0; + virtual int GetX() const = 0; + virtual int GetY() const = 0; +}; +``` + +(Note that the destructor of `Turtle` **must** be virtual, as is the case for **all** classes you intend to inherit from - otherwise the destructor of the derived class will not be called when you delete an object through a base pointer, and you'll get corrupted program states like memory leaks.) + +You can control whether the turtle's movement will leave a trace using `PenUp()` and `PenDown()`, and control its movement using `Forward()`, `Turn()`, and `GoTo()`. Finally, `GetX()` and `GetY()` tell you the current position of the turtle. + +Your program will normally use a real implementation of this interface. In tests, you can use a mock implementation instead. This allows you to easily check what drawing primitives your program is calling, with what arguments, and in which order. Tests written this way are much more robust (they won't break because your new machine does anti-aliasing differently), easier to read and maintain (the intent of a test is expressed in the code, not in some binary images), and run _much, much faster_. + +# Writing the Mock Class # +If you are lucky, the mocks you need to use have already been implemented by some nice people. If, however, you find yourself in the position to write a mock class, relax - Google Mock turns this task into a fun game! (Well, almost.) + +## How to Define It ## +Using the `Turtle` interface as example, here are the simple steps you need to follow: + + 1. Derive a class `MockTurtle` from `Turtle`. + 1. Take a _virtual_ function of `Turtle` (while it's possible to [mock non-virtual methods using templates](CookBook.md#mocking-nonvirtual-methods), it's much more involved). Count how many arguments it has. + 1. In the `public:` section of the child class, write `MOCK_METHODn();` (or `MOCK_CONST_METHODn();` if you are mocking a `const` method), where `n` is the number of the arguments; if you counted wrong, shame on you, and a compiler error will tell you so. + 1. Now comes the fun part: you take the function signature, cut-and-paste the _function name_ as the _first_ argument to the macro, and leave what's left as the _second_ argument (in case you're curious, this is the _type of the function_). + 1. Repeat until all virtual functions you want to mock are done. + +After the process, you should have something like: + +``` +#include "gmock/gmock.h" // Brings in Google Mock. +class MockTurtle : public Turtle { + public: + ... + MOCK_METHOD0(PenUp, void()); + MOCK_METHOD0(PenDown, void()); + MOCK_METHOD1(Forward, void(int distance)); + MOCK_METHOD1(Turn, void(int degrees)); + MOCK_METHOD2(GoTo, void(int x, int y)); + MOCK_CONST_METHOD0(GetX, int()); + MOCK_CONST_METHOD0(GetY, int()); +}; +``` + +You don't need to define these mock methods somewhere else - the `MOCK_METHOD*` macros will generate the definitions for you. It's that simple! Once you get the hang of it, you can pump out mock classes faster than your source-control system can handle your check-ins. + +**Tip:** If even this is too much work for you, you'll find the +`gmock_gen.py` tool in Google Mock's `scripts/generator/` directory (courtesy of the [cppclean](http://code.google.com/p/cppclean/) project) useful. This command-line +tool requires that you have Python 2.4 installed. You give it a C++ file and the name of an abstract class defined in it, +and it will print the definition of the mock class for you. Due to the +complexity of the C++ language, this script may not always work, but +it can be quite handy when it does. For more details, read the [user documentation](../scripts/generator/README). + +## Where to Put It ## +When you define a mock class, you need to decide where to put its definition. Some people put it in a `*_test.cc`. This is fine when the interface being mocked (say, `Foo`) is owned by the same person or team. Otherwise, when the owner of `Foo` changes it, your test could break. (You can't really expect `Foo`'s maintainer to fix every test that uses `Foo`, can you?) + +So, the rule of thumb is: if you need to mock `Foo` and it's owned by others, define the mock class in `Foo`'s package (better, in a `testing` sub-package such that you can clearly separate production code and testing utilities), and put it in a `mock_foo.h`. Then everyone can reference `mock_foo.h` from their tests. If `Foo` ever changes, there is only one copy of `MockFoo` to change, and only tests that depend on the changed methods need to be fixed. + +Another way to do it: you can introduce a thin layer `FooAdaptor` on top of `Foo` and code to this new interface. Since you own `FooAdaptor`, you can absorb changes in `Foo` much more easily. While this is more work initially, carefully choosing the adaptor interface can make your code easier to write and more readable (a net win in the long run), as you can choose `FooAdaptor` to fit your specific domain much better than `Foo` does. + +# Using Mocks in Tests # +Once you have a mock class, using it is easy. The typical work flow is: + + 1. Import the Google Mock names from the `testing` namespace such that you can use them unqualified (You only have to do it once per file. Remember that namespaces are a good idea and good for your health.). + 1. Create some mock objects. + 1. Specify your expectations on them (How many times will a method be called? With what arguments? What should it do? etc.). + 1. Exercise some code that uses the mocks; optionally, check the result using Google Test assertions. If a mock method is called more than expected or with wrong arguments, you'll get an error immediately. + 1. When a mock is destructed, Google Mock will automatically check whether all expectations on it have been satisfied. + +Here's an example: + +``` +#include "path/to/mock-turtle.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +using ::testing::AtLeast; // #1 + +TEST(PainterTest, CanDrawSomething) { + MockTurtle turtle; // #2 + EXPECT_CALL(turtle, PenDown()) // #3 + .Times(AtLeast(1)); + + Painter painter(&turtle); // #4 + + EXPECT_TRUE(painter.DrawCircle(0, 0, 10)); +} // #5 + +int main(int argc, char** argv) { + // The following line must be executed to initialize Google Mock + // (and Google Test) before running the tests. + ::testing::InitGoogleMock(&argc, argv); + return RUN_ALL_TESTS(); +} +``` + +As you might have guessed, this test checks that `PenDown()` is called at least once. If the `painter` object didn't call this method, your test will fail with a message like this: + +``` +path/to/my_test.cc:119: Failure +Actual function call count doesn't match this expectation: +Actually: never called; +Expected: called at least once. +``` + +**Tip 1:** If you run the test from an Emacs buffer, you can hit `` on the line number displayed in the error message to jump right to the failed expectation. + +**Tip 2:** If your mock objects are never deleted, the final verification won't happen. Therefore it's a good idea to use a heap leak checker in your tests when you allocate mocks on the heap. + +**Important note:** Google Mock requires expectations to be set **before** the mock functions are called, otherwise the behavior is **undefined**. In particular, you mustn't interleave `EXPECT_CALL()`s and calls to the mock functions. + +This means `EXPECT_CALL()` should be read as expecting that a call will occur _in the future_, not that a call has occurred. Why does Google Mock work like that? Well, specifying the expectation beforehand allows Google Mock to report a violation as soon as it arises, when the context (stack trace, etc) is still available. This makes debugging much easier. + +Admittedly, this test is contrived and doesn't do much. You can easily achieve the same effect without using Google Mock. However, as we shall reveal soon, Google Mock allows you to do _much more_ with the mocks. + +## Using Google Mock with Any Testing Framework ## +If you want to use something other than Google Test (e.g. [CppUnit](http://sourceforge.net/projects/cppunit/) or +[CxxTest](http://cxxtest.tigris.org/)) as your testing framework, just change the `main()` function in the previous section to: +``` +int main(int argc, char** argv) { + // The following line causes Google Mock to throw an exception on failure, + // which will be interpreted by your testing framework as a test failure. + ::testing::GTEST_FLAG(throw_on_failure) = true; + ::testing::InitGoogleMock(&argc, argv); + ... whatever your testing framework requires ... +} +``` + +This approach has a catch: it makes Google Mock throw an exception +from a mock object's destructor sometimes. With some compilers, this +sometimes causes the test program to crash. You'll still be able to +notice that the test has failed, but it's not a graceful failure. + +A better solution is to use Google Test's +[event listener API](../../googletest/docs/AdvancedGuide.md#extending-google-test-by-handling-test-events) +to report a test failure to your testing framework properly. You'll need to +implement the `OnTestPartResult()` method of the event listener interface, but it +should be straightforward. + +If this turns out to be too much work, we suggest that you stick with +Google Test, which works with Google Mock seamlessly (in fact, it is +technically part of Google Mock.). If there is a reason that you +cannot use Google Test, please let us know. + +# Setting Expectations # +The key to using a mock object successfully is to set the _right expectations_ on it. If you set the expectations too strict, your test will fail as the result of unrelated changes. If you set them too loose, bugs can slip through. You want to do it just right such that your test can catch exactly the kind of bugs you intend it to catch. Google Mock provides the necessary means for you to do it "just right." + +## General Syntax ## +In Google Mock we use the `EXPECT_CALL()` macro to set an expectation on a mock method. The general syntax is: + +``` +EXPECT_CALL(mock_object, method(matchers)) + .Times(cardinality) + .WillOnce(action) + .WillRepeatedly(action); +``` + +The macro has two arguments: first the mock object, and then the method and its arguments. Note that the two are separated by a comma (`,`), not a period (`.`). (Why using a comma? The answer is that it was necessary for technical reasons.) + +The macro can be followed by some optional _clauses_ that provide more information about the expectation. We'll discuss how each clause works in the coming sections. + +This syntax is designed to make an expectation read like English. For example, you can probably guess that + +``` +using ::testing::Return; +... +EXPECT_CALL(turtle, GetX()) + .Times(5) + .WillOnce(Return(100)) + .WillOnce(Return(150)) + .WillRepeatedly(Return(200)); +``` + +says that the `turtle` object's `GetX()` method will be called five times, it will return 100 the first time, 150 the second time, and then 200 every time. Some people like to call this style of syntax a Domain-Specific Language (DSL). + +**Note:** Why do we use a macro to do this? It serves two purposes: first it makes expectations easily identifiable (either by `grep` or by a human reader), and second it allows Google Mock to include the source file location of a failed expectation in messages, making debugging easier. + +## Matchers: What Arguments Do We Expect? ## +When a mock function takes arguments, we must specify what arguments we are expecting; for example: + +``` +// Expects the turtle to move forward by 100 units. +EXPECT_CALL(turtle, Forward(100)); +``` + +Sometimes you may not want to be too specific (Remember that talk about tests being too rigid? Over specification leads to brittle tests and obscures the intent of tests. Therefore we encourage you to specify only what's necessary - no more, no less.). If you care to check that `Forward()` will be called but aren't interested in its actual argument, write `_` as the argument, which means "anything goes": + +``` +using ::testing::_; +... +// Expects the turtle to move forward. +EXPECT_CALL(turtle, Forward(_)); +``` + +`_` is an instance of what we call **matchers**. A matcher is like a predicate and can test whether an argument is what we'd expect. You can use a matcher inside `EXPECT_CALL()` wherever a function argument is expected. + +A list of built-in matchers can be found in the [CheatSheet](CheatSheet.md). For example, here's the `Ge` (greater than or equal) matcher: + +``` +using ::testing::Ge; +... +EXPECT_CALL(turtle, Forward(Ge(100))); +``` + +This checks that the turtle will be told to go forward by at least 100 units. + +## Cardinalities: How Many Times Will It Be Called? ## +The first clause we can specify following an `EXPECT_CALL()` is `Times()`. We call its argument a **cardinality** as it tells _how many times_ the call should occur. It allows us to repeat an expectation many times without actually writing it as many times. More importantly, a cardinality can be "fuzzy", just like a matcher can be. This allows a user to express the intent of a test exactly. + +An interesting special case is when we say `Times(0)`. You may have guessed - it means that the function shouldn't be called with the given arguments at all, and Google Mock will report a Google Test failure whenever the function is (wrongfully) called. + +We've seen `AtLeast(n)` as an example of fuzzy cardinalities earlier. For the list of built-in cardinalities you can use, see the [CheatSheet](CheatSheet.md). + +The `Times()` clause can be omitted. **If you omit `Times()`, Google Mock will infer the cardinality for you.** The rules are easy to remember: + + * If **neither** `WillOnce()` **nor** `WillRepeatedly()` is in the `EXPECT_CALL()`, the inferred cardinality is `Times(1)`. + * If there are `n WillOnce()`'s but **no** `WillRepeatedly()`, where `n` >= 1, the cardinality is `Times(n)`. + * If there are `n WillOnce()`'s and **one** `WillRepeatedly()`, where `n` >= 0, the cardinality is `Times(AtLeast(n))`. + +**Quick quiz:** what do you think will happen if a function is expected to be called twice but actually called four times? + +## Actions: What Should It Do? ## +Remember that a mock object doesn't really have a working implementation? We as users have to tell it what to do when a method is invoked. This is easy in Google Mock. + +First, if the return type of a mock function is a built-in type or a pointer, the function has a **default action** (a `void` function will just return, a `bool` function will return `false`, and other functions will return 0). In addition, in C++ 11 and above, a mock function whose return type is default-constructible (i.e. has a default constructor) has a default action of returning a default-constructed value. If you don't say anything, this behavior will be used. + +Second, if a mock function doesn't have a default action, or the default action doesn't suit you, you can specify the action to be taken each time the expectation matches using a series of `WillOnce()` clauses followed by an optional `WillRepeatedly()`. For example, + +``` +using ::testing::Return; +... +EXPECT_CALL(turtle, GetX()) + .WillOnce(Return(100)) + .WillOnce(Return(200)) + .WillOnce(Return(300)); +``` + +This says that `turtle.GetX()` will be called _exactly three times_ (Google Mock inferred this from how many `WillOnce()` clauses we've written, since we didn't explicitly write `Times()`), and will return 100, 200, and 300 respectively. + +``` +using ::testing::Return; +... +EXPECT_CALL(turtle, GetY()) + .WillOnce(Return(100)) + .WillOnce(Return(200)) + .WillRepeatedly(Return(300)); +``` + +says that `turtle.GetY()` will be called _at least twice_ (Google Mock knows this as we've written two `WillOnce()` clauses and a `WillRepeatedly()` while having no explicit `Times()`), will return 100 the first time, 200 the second time, and 300 from the third time on. + +Of course, if you explicitly write a `Times()`, Google Mock will not try to infer the cardinality itself. What if the number you specified is larger than there are `WillOnce()` clauses? Well, after all `WillOnce()`s are used up, Google Mock will do the _default_ action for the function every time (unless, of course, you have a `WillRepeatedly()`.). + +What can we do inside `WillOnce()` besides `Return()`? You can return a reference using `ReturnRef(variable)`, or invoke a pre-defined function, among [others](CheatSheet.md#actions). + +**Important note:** The `EXPECT_CALL()` statement evaluates the action clause only once, even though the action may be performed many times. Therefore you must be careful about side effects. The following may not do what you want: + +``` +int n = 100; +EXPECT_CALL(turtle, GetX()) +.Times(4) +.WillRepeatedly(Return(n++)); +``` + +Instead of returning 100, 101, 102, ..., consecutively, this mock function will always return 100 as `n++` is only evaluated once. Similarly, `Return(new Foo)` will create a new `Foo` object when the `EXPECT_CALL()` is executed, and will return the same pointer every time. If you want the side effect to happen every time, you need to define a custom action, which we'll teach in the [CookBook](CookBook.md). + +Time for another quiz! What do you think the following means? + +``` +using ::testing::Return; +... +EXPECT_CALL(turtle, GetY()) +.Times(4) +.WillOnce(Return(100)); +``` + +Obviously `turtle.GetY()` is expected to be called four times. But if you think it will return 100 every time, think twice! Remember that one `WillOnce()` clause will be consumed each time the function is invoked and the default action will be taken afterwards. So the right answer is that `turtle.GetY()` will return 100 the first time, but **return 0 from the second time on**, as returning 0 is the default action for `int` functions. + +## Using Multiple Expectations ## +So far we've only shown examples where you have a single expectation. More realistically, you're going to specify expectations on multiple mock methods, which may be from multiple mock objects. + +By default, when a mock method is invoked, Google Mock will search the expectations in the **reverse order** they are defined, and stop when an active expectation that matches the arguments is found (you can think of it as "newer rules override older ones."). If the matching expectation cannot take any more calls, you will get an upper-bound-violated failure. Here's an example: + +``` +using ::testing::_; +... +EXPECT_CALL(turtle, Forward(_)); // #1 +EXPECT_CALL(turtle, Forward(10)) // #2 + .Times(2); +``` + +If `Forward(10)` is called three times in a row, the third time it will be an error, as the last matching expectation (#2) has been saturated. If, however, the third `Forward(10)` call is replaced by `Forward(20)`, then it would be OK, as now #1 will be the matching expectation. + +**Side note:** Why does Google Mock search for a match in the _reverse_ order of the expectations? The reason is that this allows a user to set up the default expectations in a mock object's constructor or the test fixture's set-up phase and then customize the mock by writing more specific expectations in the test body. So, if you have two expectations on the same method, you want to put the one with more specific matchers **after** the other, or the more specific rule would be shadowed by the more general one that comes after it. + +## Ordered vs Unordered Calls ## +By default, an expectation can match a call even though an earlier expectation hasn't been satisfied. In other words, the calls don't have to occur in the order the expectations are specified. + +Sometimes, you may want all the expected calls to occur in a strict order. To say this in Google Mock is easy: + +``` +using ::testing::InSequence; +... +TEST(FooTest, DrawsLineSegment) { + ... + { + InSequence dummy; + + EXPECT_CALL(turtle, PenDown()); + EXPECT_CALL(turtle, Forward(100)); + EXPECT_CALL(turtle, PenUp()); + } + Foo(); +} +``` + +By creating an object of type `InSequence`, all expectations in its scope are put into a _sequence_ and have to occur _sequentially_. Since we are just relying on the constructor and destructor of this object to do the actual work, its name is really irrelevant. + +In this example, we test that `Foo()` calls the three expected functions in the order as written. If a call is made out-of-order, it will be an error. + +(What if you care about the relative order of some of the calls, but not all of them? Can you specify an arbitrary partial order? The answer is ... yes! If you are impatient, the details can be found in the [CookBook](CookBook.md#expecting-partially-ordered-calls).) + +## All Expectations Are Sticky (Unless Said Otherwise) ## +Now let's do a quick quiz to see how well you can use this mock stuff already. How would you test that the turtle is asked to go to the origin _exactly twice_ (you want to ignore any other instructions it receives)? + +After you've come up with your answer, take a look at ours and compare notes (solve it yourself first - don't cheat!): + +``` +using ::testing::_; +... +EXPECT_CALL(turtle, GoTo(_, _)) // #1 + .Times(AnyNumber()); +EXPECT_CALL(turtle, GoTo(0, 0)) // #2 + .Times(2); +``` + +Suppose `turtle.GoTo(0, 0)` is called three times. In the third time, Google Mock will see that the arguments match expectation #2 (remember that we always pick the last matching expectation). Now, since we said that there should be only two such calls, Google Mock will report an error immediately. This is basically what we've told you in the "Using Multiple Expectations" section above. + +This example shows that **expectations in Google Mock are "sticky" by default**, in the sense that they remain active even after we have reached their invocation upper bounds. This is an important rule to remember, as it affects the meaning of the spec, and is **different** to how it's done in many other mocking frameworks (Why'd we do that? Because we think our rule makes the common cases easier to express and understand.). + +Simple? Let's see if you've really understood it: what does the following code say? + +``` +using ::testing::Return; +... +for (int i = n; i > 0; i--) { + EXPECT_CALL(turtle, GetX()) + .WillOnce(Return(10*i)); +} +``` + +If you think it says that `turtle.GetX()` will be called `n` times and will return 10, 20, 30, ..., consecutively, think twice! The problem is that, as we said, expectations are sticky. So, the second time `turtle.GetX()` is called, the last (latest) `EXPECT_CALL()` statement will match, and will immediately lead to an "upper bound exceeded" error - this piece of code is not very useful! + +One correct way of saying that `turtle.GetX()` will return 10, 20, 30, ..., is to explicitly say that the expectations are _not_ sticky. In other words, they should _retire_ as soon as they are saturated: + +``` +using ::testing::Return; +... +for (int i = n; i > 0; i--) { + EXPECT_CALL(turtle, GetX()) + .WillOnce(Return(10*i)) + .RetiresOnSaturation(); +} +``` + +And, there's a better way to do it: in this case, we expect the calls to occur in a specific order, and we line up the actions to match the order. Since the order is important here, we should make it explicit using a sequence: + +``` +using ::testing::InSequence; +using ::testing::Return; +... +{ + InSequence s; + + for (int i = 1; i <= n; i++) { + EXPECT_CALL(turtle, GetX()) + .WillOnce(Return(10*i)) + .RetiresOnSaturation(); + } +} +``` + +By the way, the other situation where an expectation may _not_ be sticky is when it's in a sequence - as soon as another expectation that comes after it in the sequence has been used, it automatically retires (and will never be used to match any call). + +## Uninteresting Calls ## +A mock object may have many methods, and not all of them are that interesting. For example, in some tests we may not care about how many times `GetX()` and `GetY()` get called. + +In Google Mock, if you are not interested in a method, just don't say anything about it. If a call to this method occurs, you'll see a warning in the test output, but it won't be a failure. + +# What Now? # +Congratulations! You've learned enough about Google Mock to start using it. Now, you might want to join the [googlemock](http://groups.google.com/group/googlemock) discussion group and actually write some tests using Google Mock - it will be fun. Hey, it may even be addictive - you've been warned. + +Then, if you feel like increasing your mock quotient, you should move on to the [CookBook](CookBook.md). You can learn many advanced features of Google Mock there -- and advance your level of enjoyment and testing bliss. diff --git a/tools/external/googletest/googlemock/docs/FrequentlyAskedQuestions.md b/tools/external/googletest/googlemock/docs/FrequentlyAskedQuestions.md new file mode 100644 index 00000000..ccaa3d7a --- /dev/null +++ b/tools/external/googletest/googlemock/docs/FrequentlyAskedQuestions.md @@ -0,0 +1,628 @@ + + +Please send your questions to the +[googlemock](http://groups.google.com/group/googlemock) discussion +group. If you need help with compiler errors, make sure you have +tried [Google Mock Doctor](#How_am_I_supposed_to_make_sense_of_these_horrible_template_error.md) first. + +## When I call a method on my mock object, the method for the real object is invoked instead. What's the problem? ## + +In order for a method to be mocked, it must be _virtual_, unless you use the [high-perf dependency injection technique](CookBook.md#mocking-nonvirtual-methods). + +## I wrote some matchers. After I upgraded to a new version of Google Mock, they no longer compile. What's going on? ## + +After version 1.4.0 of Google Mock was released, we had an idea on how +to make it easier to write matchers that can generate informative +messages efficiently. We experimented with this idea and liked what +we saw. Therefore we decided to implement it. + +Unfortunately, this means that if you have defined your own matchers +by implementing `MatcherInterface` or using `MakePolymorphicMatcher()`, +your definitions will no longer compile. Matchers defined using the +`MATCHER*` family of macros are not affected. + +Sorry for the hassle if your matchers are affected. We believe it's +in everyone's long-term interest to make this change sooner than +later. Fortunately, it's usually not hard to migrate an existing +matcher to the new API. Here's what you need to do: + +If you wrote your matcher like this: +``` +// Old matcher definition that doesn't work with the latest +// Google Mock. +using ::testing::MatcherInterface; +... +class MyWonderfulMatcher : public MatcherInterface { + public: + ... + virtual bool Matches(MyType value) const { + // Returns true if value matches. + return value.GetFoo() > 5; + } + ... +}; +``` + +you'll need to change it to: +``` +// New matcher definition that works with the latest Google Mock. +using ::testing::MatcherInterface; +using ::testing::MatchResultListener; +... +class MyWonderfulMatcher : public MatcherInterface { + public: + ... + virtual bool MatchAndExplain(MyType value, + MatchResultListener* listener) const { + // Returns true if value matches. + return value.GetFoo() > 5; + } + ... +}; +``` +(i.e. rename `Matches()` to `MatchAndExplain()` and give it a second +argument of type `MatchResultListener*`.) + +If you were also using `ExplainMatchResultTo()` to improve the matcher +message: +``` +// Old matcher definition that doesn't work with the lastest +// Google Mock. +using ::testing::MatcherInterface; +... +class MyWonderfulMatcher : public MatcherInterface { + public: + ... + virtual bool Matches(MyType value) const { + // Returns true if value matches. + return value.GetFoo() > 5; + } + + virtual void ExplainMatchResultTo(MyType value, + ::std::ostream* os) const { + // Prints some helpful information to os to help + // a user understand why value matches (or doesn't match). + *os << "the Foo property is " << value.GetFoo(); + } + ... +}; +``` + +you should move the logic of `ExplainMatchResultTo()` into +`MatchAndExplain()`, using the `MatchResultListener` argument where +the `::std::ostream` was used: +``` +// New matcher definition that works with the latest Google Mock. +using ::testing::MatcherInterface; +using ::testing::MatchResultListener; +... +class MyWonderfulMatcher : public MatcherInterface { + public: + ... + virtual bool MatchAndExplain(MyType value, + MatchResultListener* listener) const { + // Returns true if value matches. + *listener << "the Foo property is " << value.GetFoo(); + return value.GetFoo() > 5; + } + ... +}; +``` + +If your matcher is defined using `MakePolymorphicMatcher()`: +``` +// Old matcher definition that doesn't work with the latest +// Google Mock. +using ::testing::MakePolymorphicMatcher; +... +class MyGreatMatcher { + public: + ... + bool Matches(MyType value) const { + // Returns true if value matches. + return value.GetBar() < 42; + } + ... +}; +... MakePolymorphicMatcher(MyGreatMatcher()) ... +``` + +you should rename the `Matches()` method to `MatchAndExplain()` and +add a `MatchResultListener*` argument (the same as what you need to do +for matchers defined by implementing `MatcherInterface`): +``` +// New matcher definition that works with the latest Google Mock. +using ::testing::MakePolymorphicMatcher; +using ::testing::MatchResultListener; +... +class MyGreatMatcher { + public: + ... + bool MatchAndExplain(MyType value, + MatchResultListener* listener) const { + // Returns true if value matches. + return value.GetBar() < 42; + } + ... +}; +... MakePolymorphicMatcher(MyGreatMatcher()) ... +``` + +If your polymorphic matcher uses `ExplainMatchResultTo()` for better +failure messages: +``` +// Old matcher definition that doesn't work with the latest +// Google Mock. +using ::testing::MakePolymorphicMatcher; +... +class MyGreatMatcher { + public: + ... + bool Matches(MyType value) const { + // Returns true if value matches. + return value.GetBar() < 42; + } + ... +}; +void ExplainMatchResultTo(const MyGreatMatcher& matcher, + MyType value, + ::std::ostream* os) { + // Prints some helpful information to os to help + // a user understand why value matches (or doesn't match). + *os << "the Bar property is " << value.GetBar(); +} +... MakePolymorphicMatcher(MyGreatMatcher()) ... +``` + +you'll need to move the logic inside `ExplainMatchResultTo()` to +`MatchAndExplain()`: +``` +// New matcher definition that works with the latest Google Mock. +using ::testing::MakePolymorphicMatcher; +using ::testing::MatchResultListener; +... +class MyGreatMatcher { + public: + ... + bool MatchAndExplain(MyType value, + MatchResultListener* listener) const { + // Returns true if value matches. + *listener << "the Bar property is " << value.GetBar(); + return value.GetBar() < 42; + } + ... +}; +... MakePolymorphicMatcher(MyGreatMatcher()) ... +``` + +For more information, you can read these +[two](CookBook.md#writing-new-monomorphic-matchers) +[recipes](CookBook.md#writing-new-polymorphic-matchers) +from the cookbook. As always, you +are welcome to post questions on `googlemock@googlegroups.com` if you +need any help. + +## When using Google Mock, do I have to use Google Test as the testing framework? I have my favorite testing framework and don't want to switch. ## + +Google Mock works out of the box with Google Test. However, it's easy +to configure it to work with any testing framework of your choice. +[Here](ForDummies.md#using-google-mock-with-any-testing-framework) is how. + +## How am I supposed to make sense of these horrible template errors? ## + +If you are confused by the compiler errors gcc threw at you, +try consulting the _Google Mock Doctor_ tool first. What it does is to +scan stdin for gcc error messages, and spit out diagnoses on the +problems (we call them diseases) your code has. + +To "install", run command: +``` +alias gmd='/scripts/gmock_doctor.py' +``` + +To use it, do: +``` + 2>&1 | gmd +``` + +For example: +``` +make my_test 2>&1 | gmd +``` + +Or you can run `gmd` and copy-n-paste gcc's error messages to it. + +## Can I mock a variadic function? ## + +You cannot mock a variadic function (i.e. a function taking ellipsis +(`...`) arguments) directly in Google Mock. + +The problem is that in general, there is _no way_ for a mock object to +know how many arguments are passed to the variadic method, and what +the arguments' types are. Only the _author of the base class_ knows +the protocol, and we cannot look into their head. + +Therefore, to mock such a function, the _user_ must teach the mock +object how to figure out the number of arguments and their types. One +way to do it is to provide overloaded versions of the function. + +Ellipsis arguments are inherited from C and not really a C++ feature. +They are unsafe to use and don't work with arguments that have +constructors or destructors. Therefore we recommend to avoid them in +C++ as much as possible. + +## MSVC gives me warning C4301 or C4373 when I define a mock method with a const parameter. Why? ## + +If you compile this using Microsoft Visual C++ 2005 SP1: +``` +class Foo { + ... + virtual void Bar(const int i) = 0; +}; + +class MockFoo : public Foo { + ... + MOCK_METHOD1(Bar, void(const int i)); +}; +``` +You may get the following warning: +``` +warning C4301: 'MockFoo::Bar': overriding virtual function only differs from 'Foo::Bar' by const/volatile qualifier +``` + +This is a MSVC bug. The same code compiles fine with gcc ,for +example. If you use Visual C++ 2008 SP1, you would get the warning: +``` +warning C4373: 'MockFoo::Bar': virtual function overrides 'Foo::Bar', previous versions of the compiler did not override when parameters only differed by const/volatile qualifiers +``` + +In C++, if you _declare_ a function with a `const` parameter, the +`const` modifier is _ignored_. Therefore, the `Foo` base class above +is equivalent to: +``` +class Foo { + ... + virtual void Bar(int i) = 0; // int or const int? Makes no difference. +}; +``` + +In fact, you can _declare_ Bar() with an `int` parameter, and _define_ +it with a `const int` parameter. The compiler will still match them +up. + +Since making a parameter `const` is meaningless in the method +_declaration_, we recommend to remove it in both `Foo` and `MockFoo`. +That should workaround the VC bug. + +Note that we are talking about the _top-level_ `const` modifier here. +If the function parameter is passed by pointer or reference, declaring +the _pointee_ or _referee_ as `const` is still meaningful. For +example, the following two declarations are _not_ equivalent: +``` +void Bar(int* p); // Neither p nor *p is const. +void Bar(const int* p); // p is not const, but *p is. +``` + +## I have a huge mock class, and Microsoft Visual C++ runs out of memory when compiling it. What can I do? ## + +We've noticed that when the `/clr` compiler flag is used, Visual C++ +uses 5~6 times as much memory when compiling a mock class. We suggest +to avoid `/clr` when compiling native C++ mocks. + +## I can't figure out why Google Mock thinks my expectations are not satisfied. What should I do? ## + +You might want to run your test with +`--gmock_verbose=info`. This flag lets Google Mock print a trace +of every mock function call it receives. By studying the trace, +you'll gain insights on why the expectations you set are not met. + +## How can I assert that a function is NEVER called? ## + +``` +EXPECT_CALL(foo, Bar(_)) + .Times(0); +``` + +## I have a failed test where Google Mock tells me TWICE that a particular expectation is not satisfied. Isn't this redundant? ## + +When Google Mock detects a failure, it prints relevant information +(the mock function arguments, the state of relevant expectations, and +etc) to help the user debug. If another failure is detected, Google +Mock will do the same, including printing the state of relevant +expectations. + +Sometimes an expectation's state didn't change between two failures, +and you'll see the same description of the state twice. They are +however _not_ redundant, as they refer to _different points in time_. +The fact they are the same _is_ interesting information. + +## I get a heap check failure when using a mock object, but using a real object is fine. What can be wrong? ## + +Does the class (hopefully a pure interface) you are mocking have a +virtual destructor? + +Whenever you derive from a base class, make sure its destructor is +virtual. Otherwise Bad Things will happen. Consider the following +code: + +``` +class Base { + public: + // Not virtual, but should be. + ~Base() { ... } + ... +}; + +class Derived : public Base { + public: + ... + private: + std::string value_; +}; + +... + Base* p = new Derived; + ... + delete p; // Surprise! ~Base() will be called, but ~Derived() will not + // - value_ is leaked. +``` + +By changing `~Base()` to virtual, `~Derived()` will be correctly +called when `delete p` is executed, and the heap checker +will be happy. + +## The "newer expectations override older ones" rule makes writing expectations awkward. Why does Google Mock do that? ## + +When people complain about this, often they are referring to code like: + +``` +// foo.Bar() should be called twice, return 1 the first time, and return +// 2 the second time. However, I have to write the expectations in the +// reverse order. This sucks big time!!! +EXPECT_CALL(foo, Bar()) + .WillOnce(Return(2)) + .RetiresOnSaturation(); +EXPECT_CALL(foo, Bar()) + .WillOnce(Return(1)) + .RetiresOnSaturation(); +``` + +The problem is that they didn't pick the **best** way to express the test's +intent. + +By default, expectations don't have to be matched in _any_ particular +order. If you want them to match in a certain order, you need to be +explicit. This is Google Mock's (and jMock's) fundamental philosophy: it's +easy to accidentally over-specify your tests, and we want to make it +harder to do so. + +There are two better ways to write the test spec. You could either +put the expectations in sequence: + +``` +// foo.Bar() should be called twice, return 1 the first time, and return +// 2 the second time. Using a sequence, we can write the expectations +// in their natural order. +{ + InSequence s; + EXPECT_CALL(foo, Bar()) + .WillOnce(Return(1)) + .RetiresOnSaturation(); + EXPECT_CALL(foo, Bar()) + .WillOnce(Return(2)) + .RetiresOnSaturation(); +} +``` + +or you can put the sequence of actions in the same expectation: + +``` +// foo.Bar() should be called twice, return 1 the first time, and return +// 2 the second time. +EXPECT_CALL(foo, Bar()) + .WillOnce(Return(1)) + .WillOnce(Return(2)) + .RetiresOnSaturation(); +``` + +Back to the original questions: why does Google Mock search the +expectations (and `ON_CALL`s) from back to front? Because this +allows a user to set up a mock's behavior for the common case early +(e.g. in the mock's constructor or the test fixture's set-up phase) +and customize it with more specific rules later. If Google Mock +searches from front to back, this very useful pattern won't be +possible. + +## Google Mock prints a warning when a function without EXPECT\_CALL is called, even if I have set its behavior using ON\_CALL. Would it be reasonable not to show the warning in this case? ## + +When choosing between being neat and being safe, we lean toward the +latter. So the answer is that we think it's better to show the +warning. + +Often people write `ON_CALL`s in the mock object's +constructor or `SetUp()`, as the default behavior rarely changes from +test to test. Then in the test body they set the expectations, which +are often different for each test. Having an `ON_CALL` in the set-up +part of a test doesn't mean that the calls are expected. If there's +no `EXPECT_CALL` and the method is called, it's possibly an error. If +we quietly let the call go through without notifying the user, bugs +may creep in unnoticed. + +If, however, you are sure that the calls are OK, you can write + +``` +EXPECT_CALL(foo, Bar(_)) + .WillRepeatedly(...); +``` + +instead of + +``` +ON_CALL(foo, Bar(_)) + .WillByDefault(...); +``` + +This tells Google Mock that you do expect the calls and no warning should be +printed. + +Also, you can control the verbosity using the `--gmock_verbose` flag. +If you find the output too noisy when debugging, just choose a less +verbose level. + +## How can I delete the mock function's argument in an action? ## + +If you find yourself needing to perform some action that's not +supported by Google Mock directly, remember that you can define your own +actions using +[MakeAction()](CookBook.md#writing-new-actions) or +[MakePolymorphicAction()](CookBook.md#writing_new_polymorphic_actions), +or you can write a stub function and invoke it using +[Invoke()](CookBook.md#using-functions_methods_functors). + +## MOCK\_METHODn()'s second argument looks funny. Why don't you use the MOCK\_METHODn(Method, return\_type, arg\_1, ..., arg\_n) syntax? ## + +What?! I think it's beautiful. :-) + +While which syntax looks more natural is a subjective matter to some +extent, Google Mock's syntax was chosen for several practical advantages it +has. + +Try to mock a function that takes a map as an argument: +``` +virtual int GetSize(const map& m); +``` + +Using the proposed syntax, it would be: +``` +MOCK_METHOD1(GetSize, int, const map& m); +``` + +Guess what? You'll get a compiler error as the compiler thinks that +`const map& m` are **two**, not one, arguments. To work +around this you can use `typedef` to give the map type a name, but +that gets in the way of your work. Google Mock's syntax avoids this +problem as the function's argument types are protected inside a pair +of parentheses: +``` +// This compiles fine. +MOCK_METHOD1(GetSize, int(const map& m)); +``` + +You still need a `typedef` if the return type contains an unprotected +comma, but that's much rarer. + +Other advantages include: + 1. `MOCK_METHOD1(Foo, int, bool)` can leave a reader wonder whether the method returns `int` or `bool`, while there won't be such confusion using Google Mock's syntax. + 1. The way Google Mock describes a function type is nothing new, although many people may not be familiar with it. The same syntax was used in C, and the `function` library in `tr1` uses this syntax extensively. Since `tr1` will become a part of the new version of STL, we feel very comfortable to be consistent with it. + 1. The function type syntax is also used in other parts of Google Mock's API (e.g. the action interface) in order to make the implementation tractable. A user needs to learn it anyway in order to utilize Google Mock's more advanced features. We'd as well stick to the same syntax in `MOCK_METHOD*`! + +## My code calls a static/global function. Can I mock it? ## + +You can, but you need to make some changes. + +In general, if you find yourself needing to mock a static function, +it's a sign that your modules are too tightly coupled (and less +flexible, less reusable, less testable, etc). You are probably better +off defining a small interface and call the function through that +interface, which then can be easily mocked. It's a bit of work +initially, but usually pays for itself quickly. + +This Google Testing Blog +[post](http://googletesting.blogspot.com/2008/06/defeat-static-cling.html) +says it excellently. Check it out. + +## My mock object needs to do complex stuff. It's a lot of pain to specify the actions. Google Mock sucks! ## + +I know it's not a question, but you get an answer for free any way. :-) + +With Google Mock, you can create mocks in C++ easily. And people might be +tempted to use them everywhere. Sometimes they work great, and +sometimes you may find them, well, a pain to use. So, what's wrong in +the latter case? + +When you write a test without using mocks, you exercise the code and +assert that it returns the correct value or that the system is in an +expected state. This is sometimes called "state-based testing". + +Mocks are great for what some call "interaction-based" testing: +instead of checking the system state at the very end, mock objects +verify that they are invoked the right way and report an error as soon +as it arises, giving you a handle on the precise context in which the +error was triggered. This is often more effective and economical to +do than state-based testing. + +If you are doing state-based testing and using a test double just to +simulate the real object, you are probably better off using a fake. +Using a mock in this case causes pain, as it's not a strong point for +mocks to perform complex actions. If you experience this and think +that mocks suck, you are just not using the right tool for your +problem. Or, you might be trying to solve the wrong problem. :-) + +## I got a warning "Uninteresting function call encountered - default action taken.." Should I panic? ## + +By all means, NO! It's just an FYI. + +What it means is that you have a mock function, you haven't set any +expectations on it (by Google Mock's rule this means that you are not +interested in calls to this function and therefore it can be called +any number of times), and it is called. That's OK - you didn't say +it's not OK to call the function! + +What if you actually meant to disallow this function to be called, but +forgot to write `EXPECT_CALL(foo, Bar()).Times(0)`? While +one can argue that it's the user's fault, Google Mock tries to be nice and +prints you a note. + +So, when you see the message and believe that there shouldn't be any +uninteresting calls, you should investigate what's going on. To make +your life easier, Google Mock prints the function name and arguments +when an uninteresting call is encountered. + +## I want to define a custom action. Should I use Invoke() or implement the action interface? ## + +Either way is fine - you want to choose the one that's more convenient +for your circumstance. + +Usually, if your action is for a particular function type, defining it +using `Invoke()` should be easier; if your action can be used in +functions of different types (e.g. if you are defining +`Return(value)`), `MakePolymorphicAction()` is +easiest. Sometimes you want precise control on what types of +functions the action can be used in, and implementing +`ActionInterface` is the way to go here. See the implementation of +`Return()` in `include/gmock/gmock-actions.h` for an example. + +## I'm using the set-argument-pointee action, and the compiler complains about "conflicting return type specified". What does it mean? ## + +You got this error as Google Mock has no idea what value it should return +when the mock method is called. `SetArgPointee()` says what the +side effect is, but doesn't say what the return value should be. You +need `DoAll()` to chain a `SetArgPointee()` with a `Return()`. + +See this [recipe](CookBook.md#mocking_side_effects) for more details and an example. + + +## My question is not in your FAQ! ## + +If you cannot find the answer to your question in this FAQ, there are +some other resources you can use: + + 1. read other [documentation](Documentation.md), + 1. search the mailing list [archive](http://groups.google.com/group/googlemock/topics), + 1. ask it on [googlemock@googlegroups.com](mailto:googlemock@googlegroups.com) and someone will answer it (to prevent spam, we require you to join the [discussion group](http://groups.google.com/group/googlemock) before you can post.). + +Please note that creating an issue in the +[issue tracker](https://github.com/google/googletest/issues) is _not_ +a good way to get your answer, as it is monitored infrequently by a +very small number of people. + +When asking a question, it's helpful to provide as much of the +following information as possible (people cannot help you if there's +not enough information in your question): + + * the version (or the revision number if you check out from SVN directly) of Google Mock you use (Google Mock is under active development, so it's possible that your problem has been solved in a later version), + * your operating system, + * the name and version of your compiler, + * the complete command line flags you give to your compiler, + * the complete compiler error messages (if the question is about compilation), + * the _actual_ code (ideally, a minimal but complete program) that has the problem you encounter. diff --git a/tools/external/googletest/googlemock/docs/KnownIssues.md b/tools/external/googletest/googlemock/docs/KnownIssues.md new file mode 100644 index 00000000..adadf514 --- /dev/null +++ b/tools/external/googletest/googlemock/docs/KnownIssues.md @@ -0,0 +1,19 @@ +As any non-trivial software system, Google Mock has some known limitations and problems. We are working on improving it, and welcome your help! The follow is a list of issues we know about. + + + +## README contains outdated information on Google Mock's compatibility with other testing frameworks ## + +The `README` file in release 1.1.0 still says that Google Mock only works with Google Test. Actually, you can configure Google Mock to work with any testing framework you choose. + +## Tests failing on machines using Power PC CPUs (e.g. some Macs) ## + +`gmock_output_test` and `gmock-printers_test` are known to fail with Power PC CPUs. This is due to portability issues with these tests, and is not an indication of problems in Google Mock itself. You can safely ignore them. + +## Failed to resolve libgtest.so.0 in tests when built against installed Google Test ## + +This only applies if you manually built and installed Google Test, and then built a Google Mock against it (either explicitly, or because gtest-config was in your path post-install). In this situation, Libtool has a known issue with certain systems' ldconfig setup: + +http://article.gmane.org/gmane.comp.sysutils.automake.general/9025 + +This requires a manual run of "sudo ldconfig" after the "sudo make install" for Google Test before any binaries which link against it can be executed. This isn't a bug in our install, but we should at least have documented it or hacked a work-around into our install. We should have one of these solutions in our next release. \ No newline at end of file diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-actions.h b/tools/external/googletest/googlemock/include/gmock/gmock-actions.h new file mode 100644 index 00000000..845c8232 --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-actions.h @@ -0,0 +1,1205 @@ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used actions. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_ + +#ifndef _WIN32_WCE +# include +#endif + +#include +#include + +#include "gmock/internal/gmock-internal-utils.h" +#include "gmock/internal/gmock-port.h" + +#if GTEST_HAS_STD_TYPE_TRAITS_ // Defined by gtest-port.h via gmock-port.h. +#include +#endif + +namespace testing { + +// To implement an action Foo, define: +// 1. a class FooAction that implements the ActionInterface interface, and +// 2. a factory function that creates an Action object from a +// const FooAction*. +// +// The two-level delegation design follows that of Matcher, providing +// consistency for extension developers. It also eases ownership +// management as Action objects can now be copied like plain values. + +namespace internal { + +template +class ActionAdaptor; + +// BuiltInDefaultValueGetter::Get() returns a +// default-constructed T value. BuiltInDefaultValueGetter::Get() crashes with an error. +// +// This primary template is used when kDefaultConstructible is true. +template +struct BuiltInDefaultValueGetter { + static T Get() { return T(); } +}; +template +struct BuiltInDefaultValueGetter { + static T Get() { + Assert(false, __FILE__, __LINE__, + "Default action undefined for the function return type."); + return internal::Invalid(); + // The above statement will never be reached, but is required in + // order for this function to compile. + } +}; + +// BuiltInDefaultValue::Get() returns the "built-in" default value +// for type T, which is NULL when T is a raw pointer type, 0 when T is +// a numeric type, false when T is bool, or "" when T is string or +// std::string. In addition, in C++11 and above, it turns a +// default-constructed T value if T is default constructible. For any +// other type T, the built-in default T value is undefined, and the +// function will abort the process. +template +class BuiltInDefaultValue { + public: +#if GTEST_HAS_STD_TYPE_TRAITS_ + // This function returns true iff type T has a built-in default value. + static bool Exists() { + return ::std::is_default_constructible::value; + } + + static T Get() { + return BuiltInDefaultValueGetter< + T, ::std::is_default_constructible::value>::Get(); + } + +#else // GTEST_HAS_STD_TYPE_TRAITS_ + // This function returns true iff type T has a built-in default value. + static bool Exists() { + return false; + } + + static T Get() { + return BuiltInDefaultValueGetter::Get(); + } + +#endif // GTEST_HAS_STD_TYPE_TRAITS_ +}; + +// This partial specialization says that we use the same built-in +// default value for T and const T. +template +class BuiltInDefaultValue { + public: + static bool Exists() { return BuiltInDefaultValue::Exists(); } + static T Get() { return BuiltInDefaultValue::Get(); } +}; + +// This partial specialization defines the default values for pointer +// types. +template +class BuiltInDefaultValue { + public: + static bool Exists() { return true; } + static T* Get() { return NULL; } +}; + +// The following specializations define the default values for +// specific types we care about. +#define GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(type, value) \ + template <> \ + class BuiltInDefaultValue { \ + public: \ + static bool Exists() { return true; } \ + static type Get() { return value; } \ + } + +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(void, ); // NOLINT +#if GTEST_HAS_GLOBAL_STRING +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(::string, ""); +#endif // GTEST_HAS_GLOBAL_STRING +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(::std::string, ""); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(bool, false); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned char, '\0'); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed char, '\0'); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(char, '\0'); + +// There's no need for a default action for signed wchar_t, as that +// type is the same as wchar_t for gcc, and invalid for MSVC. +// +// There's also no need for a default action for unsigned wchar_t, as +// that type is the same as unsigned int for gcc, and invalid for +// MSVC. +#if GMOCK_WCHAR_T_IS_NATIVE_ +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(wchar_t, 0U); // NOLINT +#endif + +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned short, 0U); // NOLINT +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed short, 0); // NOLINT +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned int, 0U); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed int, 0); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned long, 0UL); // NOLINT +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed long, 0L); // NOLINT +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(UInt64, 0); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(Int64, 0); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(float, 0); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(double, 0); + +#undef GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_ + +} // namespace internal + +// When an unexpected function call is encountered, Google Mock will +// let it return a default value if the user has specified one for its +// return type, or if the return type has a built-in default value; +// otherwise Google Mock won't know what value to return and will have +// to abort the process. +// +// The DefaultValue class allows a user to specify the +// default value for a type T that is both copyable and publicly +// destructible (i.e. anything that can be used as a function return +// type). The usage is: +// +// // Sets the default value for type T to be foo. +// DefaultValue::Set(foo); +template +class DefaultValue { + public: + // Sets the default value for type T; requires T to be + // copy-constructable and have a public destructor. + static void Set(T x) { + delete producer_; + producer_ = new FixedValueProducer(x); + } + + // Provides a factory function to be called to generate the default value. + // This method can be used even if T is only move-constructible, but it is not + // limited to that case. + typedef T (*FactoryFunction)(); + static void SetFactory(FactoryFunction factory) { + delete producer_; + producer_ = new FactoryValueProducer(factory); + } + + // Unsets the default value for type T. + static void Clear() { + delete producer_; + producer_ = NULL; + } + + // Returns true iff the user has set the default value for type T. + static bool IsSet() { return producer_ != NULL; } + + // Returns true if T has a default return value set by the user or there + // exists a built-in default value. + static bool Exists() { + return IsSet() || internal::BuiltInDefaultValue::Exists(); + } + + // Returns the default value for type T if the user has set one; + // otherwise returns the built-in default value. Requires that Exists() + // is true, which ensures that the return value is well-defined. + static T Get() { + return producer_ == NULL ? + internal::BuiltInDefaultValue::Get() : producer_->Produce(); + } + + private: + class ValueProducer { + public: + virtual ~ValueProducer() {} + virtual T Produce() = 0; + }; + + class FixedValueProducer : public ValueProducer { + public: + explicit FixedValueProducer(T value) : value_(value) {} + virtual T Produce() { return value_; } + + private: + const T value_; + GTEST_DISALLOW_COPY_AND_ASSIGN_(FixedValueProducer); + }; + + class FactoryValueProducer : public ValueProducer { + public: + explicit FactoryValueProducer(FactoryFunction factory) + : factory_(factory) {} + virtual T Produce() { return factory_(); } + + private: + const FactoryFunction factory_; + GTEST_DISALLOW_COPY_AND_ASSIGN_(FactoryValueProducer); + }; + + static ValueProducer* producer_; +}; + +// This partial specialization allows a user to set default values for +// reference types. +template +class DefaultValue { + public: + // Sets the default value for type T&. + static void Set(T& x) { // NOLINT + address_ = &x; + } + + // Unsets the default value for type T&. + static void Clear() { + address_ = NULL; + } + + // Returns true iff the user has set the default value for type T&. + static bool IsSet() { return address_ != NULL; } + + // Returns true if T has a default return value set by the user or there + // exists a built-in default value. + static bool Exists() { + return IsSet() || internal::BuiltInDefaultValue::Exists(); + } + + // Returns the default value for type T& if the user has set one; + // otherwise returns the built-in default value if there is one; + // otherwise aborts the process. + static T& Get() { + return address_ == NULL ? + internal::BuiltInDefaultValue::Get() : *address_; + } + + private: + static T* address_; +}; + +// This specialization allows DefaultValue::Get() to +// compile. +template <> +class DefaultValue { + public: + static bool Exists() { return true; } + static void Get() {} +}; + +// Points to the user-set default value for type T. +template +typename DefaultValue::ValueProducer* DefaultValue::producer_ = NULL; + +// Points to the user-set default value for type T&. +template +T* DefaultValue::address_ = NULL; + +// Implement this interface to define an action for function type F. +template +class ActionInterface { + public: + typedef typename internal::Function::Result Result; + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + ActionInterface() {} + virtual ~ActionInterface() {} + + // Performs the action. This method is not const, as in general an + // action can have side effects and be stateful. For example, a + // get-the-next-element-from-the-collection action will need to + // remember the current element. + virtual Result Perform(const ArgumentTuple& args) = 0; + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(ActionInterface); +}; + +// An Action is a copyable and IMMUTABLE (except by assignment) +// object that represents an action to be taken when a mock function +// of type F is called. The implementation of Action is just a +// linked_ptr to const ActionInterface, so copying is fairly cheap. +// Don't inherit from Action! +// +// You can view an object implementing ActionInterface as a +// concrete action (including its current state), and an Action +// object as a handle to it. +template +class Action { + public: + typedef typename internal::Function::Result Result; + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + // Constructs a null Action. Needed for storing Action objects in + // STL containers. + Action() : impl_(NULL) {} + + // Constructs an Action from its implementation. A NULL impl is + // used to represent the "do-default" action. + explicit Action(ActionInterface* impl) : impl_(impl) {} + + // Copy constructor. + Action(const Action& action) : impl_(action.impl_) {} + + // This constructor allows us to turn an Action object into an + // Action, as long as F's arguments can be implicitly converted + // to Func's and Func's return type can be implicitly converted to + // F's. + template + explicit Action(const Action& action); + + // Returns true iff this is the DoDefault() action. + bool IsDoDefault() const { return impl_.get() == NULL; } + + // Performs the action. Note that this method is const even though + // the corresponding method in ActionInterface is not. The reason + // is that a const Action means that it cannot be re-bound to + // another concrete action, not that the concrete action it binds to + // cannot change state. (Think of the difference between a const + // pointer and a pointer to const.) + Result Perform(const ArgumentTuple& args) const { + internal::Assert( + !IsDoDefault(), __FILE__, __LINE__, + "You are using DoDefault() inside a composite action like " + "DoAll() or WithArgs(). This is not supported for technical " + "reasons. Please instead spell out the default action, or " + "assign the default action to an Action variable and use " + "the variable in various places."); + return impl_->Perform(args); + } + + private: + template + friend class internal::ActionAdaptor; + + internal::linked_ptr > impl_; +}; + +// The PolymorphicAction class template makes it easy to implement a +// polymorphic action (i.e. an action that can be used in mock +// functions of than one type, e.g. Return()). +// +// To define a polymorphic action, a user first provides a COPYABLE +// implementation class that has a Perform() method template: +// +// class FooAction { +// public: +// template +// Result Perform(const ArgumentTuple& args) const { +// // Processes the arguments and returns a result, using +// // tr1::get(args) to get the N-th (0-based) argument in the tuple. +// } +// ... +// }; +// +// Then the user creates the polymorphic action using +// MakePolymorphicAction(object) where object has type FooAction. See +// the definition of Return(void) and SetArgumentPointee(value) for +// complete examples. +template +class PolymorphicAction { + public: + explicit PolymorphicAction(const Impl& impl) : impl_(impl) {} + + template + operator Action() const { + return Action(new MonomorphicImpl(impl_)); + } + + private: + template + class MonomorphicImpl : public ActionInterface { + public: + typedef typename internal::Function::Result Result; + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {} + + virtual Result Perform(const ArgumentTuple& args) { + return impl_.template Perform(args); + } + + private: + Impl impl_; + + GTEST_DISALLOW_ASSIGN_(MonomorphicImpl); + }; + + Impl impl_; + + GTEST_DISALLOW_ASSIGN_(PolymorphicAction); +}; + +// Creates an Action from its implementation and returns it. The +// created Action object owns the implementation. +template +Action MakeAction(ActionInterface* impl) { + return Action(impl); +} + +// Creates a polymorphic action from its implementation. This is +// easier to use than the PolymorphicAction constructor as it +// doesn't require you to explicitly write the template argument, e.g. +// +// MakePolymorphicAction(foo); +// vs +// PolymorphicAction(foo); +template +inline PolymorphicAction MakePolymorphicAction(const Impl& impl) { + return PolymorphicAction(impl); +} + +namespace internal { + +// Allows an Action object to pose as an Action, as long as F2 +// and F1 are compatible. +template +class ActionAdaptor : public ActionInterface { + public: + typedef typename internal::Function::Result Result; + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + explicit ActionAdaptor(const Action& from) : impl_(from.impl_) {} + + virtual Result Perform(const ArgumentTuple& args) { + return impl_->Perform(args); + } + + private: + const internal::linked_ptr > impl_; + + GTEST_DISALLOW_ASSIGN_(ActionAdaptor); +}; + +// Helper struct to specialize ReturnAction to execute a move instead of a copy +// on return. Useful for move-only types, but could be used on any type. +template +struct ByMoveWrapper { + explicit ByMoveWrapper(T value) : payload(internal::move(value)) {} + T payload; +}; + +// Implements the polymorphic Return(x) action, which can be used in +// any function that returns the type of x, regardless of the argument +// types. +// +// Note: The value passed into Return must be converted into +// Function::Result when this action is cast to Action rather than +// when that action is performed. This is important in scenarios like +// +// MOCK_METHOD1(Method, T(U)); +// ... +// { +// Foo foo; +// X x(&foo); +// EXPECT_CALL(mock, Method(_)).WillOnce(Return(x)); +// } +// +// In the example above the variable x holds reference to foo which leaves +// scope and gets destroyed. If copying X just copies a reference to foo, +// that copy will be left with a hanging reference. If conversion to T +// makes a copy of foo, the above code is safe. To support that scenario, we +// need to make sure that the type conversion happens inside the EXPECT_CALL +// statement, and conversion of the result of Return to Action is a +// good place for that. +// +template +class ReturnAction { + public: + // Constructs a ReturnAction object from the value to be returned. + // 'value' is passed by value instead of by const reference in order + // to allow Return("string literal") to compile. + explicit ReturnAction(R value) : value_(new R(internal::move(value))) {} + + // This template type conversion operator allows Return(x) to be + // used in ANY function that returns x's type. + template + operator Action() const { + // Assert statement belongs here because this is the best place to verify + // conditions on F. It produces the clearest error messages + // in most compilers. + // Impl really belongs in this scope as a local class but can't + // because MSVC produces duplicate symbols in different translation units + // in this case. Until MS fixes that bug we put Impl into the class scope + // and put the typedef both here (for use in assert statement) and + // in the Impl class. But both definitions must be the same. + typedef typename Function::Result Result; + GTEST_COMPILE_ASSERT_( + !is_reference::value, + use_ReturnRef_instead_of_Return_to_return_a_reference); + return Action(new Impl(value_)); + } + + private: + // Implements the Return(x) action for a particular function type F. + template + class Impl : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + + // The implicit cast is necessary when Result has more than one + // single-argument constructor (e.g. Result is std::vector) and R + // has a type conversion operator template. In that case, value_(value) + // won't compile as the compiler doesn't known which constructor of + // Result to call. ImplicitCast_ forces the compiler to convert R to + // Result without considering explicit constructors, thus resolving the + // ambiguity. value_ is then initialized using its copy constructor. + explicit Impl(const linked_ptr& value) + : value_before_cast_(*value), + value_(ImplicitCast_(value_before_cast_)) {} + + virtual Result Perform(const ArgumentTuple&) { return value_; } + + private: + GTEST_COMPILE_ASSERT_(!is_reference::value, + Result_cannot_be_a_reference_type); + // We save the value before casting just in case it is being cast to a + // wrapper type. + R value_before_cast_; + Result value_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(Impl); + }; + + // Partially specialize for ByMoveWrapper. This version of ReturnAction will + // move its contents instead. + template + class Impl, F> : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + + explicit Impl(const linked_ptr& wrapper) + : performed_(false), wrapper_(wrapper) {} + + virtual Result Perform(const ArgumentTuple&) { + GTEST_CHECK_(!performed_) + << "A ByMove() action should only be performed once."; + performed_ = true; + return internal::move(wrapper_->payload); + } + + private: + bool performed_; + const linked_ptr wrapper_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + const linked_ptr value_; + + GTEST_DISALLOW_ASSIGN_(ReturnAction); +}; + +// Implements the ReturnNull() action. +class ReturnNullAction { + public: + // Allows ReturnNull() to be used in any pointer-returning function. In C++11 + // this is enforced by returning nullptr, and in non-C++11 by asserting a + // pointer type on compile time. + template + static Result Perform(const ArgumentTuple&) { +#if GTEST_LANG_CXX11 + return nullptr; +#else + GTEST_COMPILE_ASSERT_(internal::is_pointer::value, + ReturnNull_can_be_used_to_return_a_pointer_only); + return NULL; +#endif // GTEST_LANG_CXX11 + } +}; + +// Implements the Return() action. +class ReturnVoidAction { + public: + // Allows Return() to be used in any void-returning function. + template + static void Perform(const ArgumentTuple&) { + CompileAssertTypesEqual(); + } +}; + +// Implements the polymorphic ReturnRef(x) action, which can be used +// in any function that returns a reference to the type of x, +// regardless of the argument types. +template +class ReturnRefAction { + public: + // Constructs a ReturnRefAction object from the reference to be returned. + explicit ReturnRefAction(T& ref) : ref_(ref) {} // NOLINT + + // This template type conversion operator allows ReturnRef(x) to be + // used in ANY function that returns a reference to x's type. + template + operator Action() const { + typedef typename Function::Result Result; + // Asserts that the function return type is a reference. This + // catches the user error of using ReturnRef(x) when Return(x) + // should be used, and generates some helpful error message. + GTEST_COMPILE_ASSERT_(internal::is_reference::value, + use_Return_instead_of_ReturnRef_to_return_a_value); + return Action(new Impl(ref_)); + } + + private: + // Implements the ReturnRef(x) action for a particular function type F. + template + class Impl : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + + explicit Impl(T& ref) : ref_(ref) {} // NOLINT + + virtual Result Perform(const ArgumentTuple&) { + return ref_; + } + + private: + T& ref_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + T& ref_; + + GTEST_DISALLOW_ASSIGN_(ReturnRefAction); +}; + +// Implements the polymorphic ReturnRefOfCopy(x) action, which can be +// used in any function that returns a reference to the type of x, +// regardless of the argument types. +template +class ReturnRefOfCopyAction { + public: + // Constructs a ReturnRefOfCopyAction object from the reference to + // be returned. + explicit ReturnRefOfCopyAction(const T& value) : value_(value) {} // NOLINT + + // This template type conversion operator allows ReturnRefOfCopy(x) to be + // used in ANY function that returns a reference to x's type. + template + operator Action() const { + typedef typename Function::Result Result; + // Asserts that the function return type is a reference. This + // catches the user error of using ReturnRefOfCopy(x) when Return(x) + // should be used, and generates some helpful error message. + GTEST_COMPILE_ASSERT_( + internal::is_reference::value, + use_Return_instead_of_ReturnRefOfCopy_to_return_a_value); + return Action(new Impl(value_)); + } + + private: + // Implements the ReturnRefOfCopy(x) action for a particular function type F. + template + class Impl : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + + explicit Impl(const T& value) : value_(value) {} // NOLINT + + virtual Result Perform(const ArgumentTuple&) { + return value_; + } + + private: + T value_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + const T value_; + + GTEST_DISALLOW_ASSIGN_(ReturnRefOfCopyAction); +}; + +// Implements the polymorphic DoDefault() action. +class DoDefaultAction { + public: + // This template type conversion operator allows DoDefault() to be + // used in any function. + template + operator Action() const { return Action(NULL); } +}; + +// Implements the Assign action to set a given pointer referent to a +// particular value. +template +class AssignAction { + public: + AssignAction(T1* ptr, T2 value) : ptr_(ptr), value_(value) {} + + template + void Perform(const ArgumentTuple& /* args */) const { + *ptr_ = value_; + } + + private: + T1* const ptr_; + const T2 value_; + + GTEST_DISALLOW_ASSIGN_(AssignAction); +}; + +#if !GTEST_OS_WINDOWS_MOBILE + +// Implements the SetErrnoAndReturn action to simulate return from +// various system calls and libc functions. +template +class SetErrnoAndReturnAction { + public: + SetErrnoAndReturnAction(int errno_value, T result) + : errno_(errno_value), + result_(result) {} + template + Result Perform(const ArgumentTuple& /* args */) const { + errno = errno_; + return result_; + } + + private: + const int errno_; + const T result_; + + GTEST_DISALLOW_ASSIGN_(SetErrnoAndReturnAction); +}; + +#endif // !GTEST_OS_WINDOWS_MOBILE + +// Implements the SetArgumentPointee(x) action for any function +// whose N-th argument (0-based) is a pointer to x's type. The +// template parameter kIsProto is true iff type A is ProtocolMessage, +// proto2::Message, or a sub-class of those. +template +class SetArgumentPointeeAction { + public: + // Constructs an action that sets the variable pointed to by the + // N-th function argument to 'value'. + explicit SetArgumentPointeeAction(const A& value) : value_(value) {} + + template + void Perform(const ArgumentTuple& args) const { + CompileAssertTypesEqual(); + *::testing::get(args) = value_; + } + + private: + const A value_; + + GTEST_DISALLOW_ASSIGN_(SetArgumentPointeeAction); +}; + +template +class SetArgumentPointeeAction { + public: + // Constructs an action that sets the variable pointed to by the + // N-th function argument to 'proto'. Both ProtocolMessage and + // proto2::Message have the CopyFrom() method, so the same + // implementation works for both. + explicit SetArgumentPointeeAction(const Proto& proto) : proto_(new Proto) { + proto_->CopyFrom(proto); + } + + template + void Perform(const ArgumentTuple& args) const { + CompileAssertTypesEqual(); + ::testing::get(args)->CopyFrom(*proto_); + } + + private: + const internal::linked_ptr proto_; + + GTEST_DISALLOW_ASSIGN_(SetArgumentPointeeAction); +}; + +// Implements the InvokeWithoutArgs(f) action. The template argument +// FunctionImpl is the implementation type of f, which can be either a +// function pointer or a functor. InvokeWithoutArgs(f) can be used as an +// Action as long as f's type is compatible with F (i.e. f can be +// assigned to a tr1::function). +template +class InvokeWithoutArgsAction { + public: + // The c'tor makes a copy of function_impl (either a function + // pointer or a functor). + explicit InvokeWithoutArgsAction(FunctionImpl function_impl) + : function_impl_(function_impl) {} + + // Allows InvokeWithoutArgs(f) to be used as any action whose type is + // compatible with f. + template + Result Perform(const ArgumentTuple&) { return function_impl_(); } + + private: + FunctionImpl function_impl_; + + GTEST_DISALLOW_ASSIGN_(InvokeWithoutArgsAction); +}; + +// Implements the InvokeWithoutArgs(object_ptr, &Class::Method) action. +template +class InvokeMethodWithoutArgsAction { + public: + InvokeMethodWithoutArgsAction(Class* obj_ptr, MethodPtr method_ptr) + : obj_ptr_(obj_ptr), method_ptr_(method_ptr) {} + + template + Result Perform(const ArgumentTuple&) const { + return (obj_ptr_->*method_ptr_)(); + } + + private: + Class* const obj_ptr_; + const MethodPtr method_ptr_; + + GTEST_DISALLOW_ASSIGN_(InvokeMethodWithoutArgsAction); +}; + +// Implements the IgnoreResult(action) action. +template +class IgnoreResultAction { + public: + explicit IgnoreResultAction(const A& action) : action_(action) {} + + template + operator Action() const { + // Assert statement belongs here because this is the best place to verify + // conditions on F. It produces the clearest error messages + // in most compilers. + // Impl really belongs in this scope as a local class but can't + // because MSVC produces duplicate symbols in different translation units + // in this case. Until MS fixes that bug we put Impl into the class scope + // and put the typedef both here (for use in assert statement) and + // in the Impl class. But both definitions must be the same. + typedef typename internal::Function::Result Result; + + // Asserts at compile time that F returns void. + CompileAssertTypesEqual(); + + return Action(new Impl(action_)); + } + + private: + template + class Impl : public ActionInterface { + public: + typedef typename internal::Function::Result Result; + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + explicit Impl(const A& action) : action_(action) {} + + virtual void Perform(const ArgumentTuple& args) { + // Performs the action and ignores its result. + action_.Perform(args); + } + + private: + // Type OriginalFunction is the same as F except that its return + // type is IgnoredValue. + typedef typename internal::Function::MakeResultIgnoredValue + OriginalFunction; + + const Action action_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + const A action_; + + GTEST_DISALLOW_ASSIGN_(IgnoreResultAction); +}; + +// A ReferenceWrapper object represents a reference to type T, +// which can be either const or not. It can be explicitly converted +// from, and implicitly converted to, a T&. Unlike a reference, +// ReferenceWrapper can be copied and can survive template type +// inference. This is used to support by-reference arguments in the +// InvokeArgument(...) action. The idea was from "reference +// wrappers" in tr1, which we don't have in our source tree yet. +template +class ReferenceWrapper { + public: + // Constructs a ReferenceWrapper object from a T&. + explicit ReferenceWrapper(T& l_value) : pointer_(&l_value) {} // NOLINT + + // Allows a ReferenceWrapper object to be implicitly converted to + // a T&. + operator T&() const { return *pointer_; } + private: + T* pointer_; +}; + +// Allows the expression ByRef(x) to be printed as a reference to x. +template +void PrintTo(const ReferenceWrapper& ref, ::std::ostream* os) { + T& value = ref; + UniversalPrinter::Print(value, os); +} + +// Does two actions sequentially. Used for implementing the DoAll(a1, +// a2, ...) action. +template +class DoBothAction { + public: + DoBothAction(Action1 action1, Action2 action2) + : action1_(action1), action2_(action2) {} + + // This template type conversion operator allows DoAll(a1, ..., a_n) + // to be used in ANY function of compatible type. + template + operator Action() const { + return Action(new Impl(action1_, action2_)); + } + + private: + // Implements the DoAll(...) action for a particular function type F. + template + class Impl : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + typedef typename Function::MakeResultVoid VoidResult; + + Impl(const Action& action1, const Action& action2) + : action1_(action1), action2_(action2) {} + + virtual Result Perform(const ArgumentTuple& args) { + action1_.Perform(args); + return action2_.Perform(args); + } + + private: + const Action action1_; + const Action action2_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + Action1 action1_; + Action2 action2_; + + GTEST_DISALLOW_ASSIGN_(DoBothAction); +}; + +} // namespace internal + +// An Unused object can be implicitly constructed from ANY value. +// This is handy when defining actions that ignore some or all of the +// mock function arguments. For example, given +// +// MOCK_METHOD3(Foo, double(const string& label, double x, double y)); +// MOCK_METHOD3(Bar, double(int index, double x, double y)); +// +// instead of +// +// double DistanceToOriginWithLabel(const string& label, double x, double y) { +// return sqrt(x*x + y*y); +// } +// double DistanceToOriginWithIndex(int index, double x, double y) { +// return sqrt(x*x + y*y); +// } +// ... +// EXPECT_CALL(mock, Foo("abc", _, _)) +// .WillOnce(Invoke(DistanceToOriginWithLabel)); +// EXPECT_CALL(mock, Bar(5, _, _)) +// .WillOnce(Invoke(DistanceToOriginWithIndex)); +// +// you could write +// +// // We can declare any uninteresting argument as Unused. +// double DistanceToOrigin(Unused, double x, double y) { +// return sqrt(x*x + y*y); +// } +// ... +// EXPECT_CALL(mock, Foo("abc", _, _)).WillOnce(Invoke(DistanceToOrigin)); +// EXPECT_CALL(mock, Bar(5, _, _)).WillOnce(Invoke(DistanceToOrigin)); +typedef internal::IgnoredValue Unused; + +// This constructor allows us to turn an Action object into an +// Action, as long as To's arguments can be implicitly converted +// to From's and From's return type cann be implicitly converted to +// To's. +template +template +Action::Action(const Action& from) + : impl_(new internal::ActionAdaptor(from)) {} + +// Creates an action that returns 'value'. 'value' is passed by value +// instead of const reference - otherwise Return("string literal") +// will trigger a compiler error about using array as initializer. +template +internal::ReturnAction Return(R value) { + return internal::ReturnAction(internal::move(value)); +} + +// Creates an action that returns NULL. +inline PolymorphicAction ReturnNull() { + return MakePolymorphicAction(internal::ReturnNullAction()); +} + +// Creates an action that returns from a void function. +inline PolymorphicAction Return() { + return MakePolymorphicAction(internal::ReturnVoidAction()); +} + +// Creates an action that returns the reference to a variable. +template +inline internal::ReturnRefAction ReturnRef(R& x) { // NOLINT + return internal::ReturnRefAction(x); +} + +// Creates an action that returns the reference to a copy of the +// argument. The copy is created when the action is constructed and +// lives as long as the action. +template +inline internal::ReturnRefOfCopyAction ReturnRefOfCopy(const R& x) { + return internal::ReturnRefOfCopyAction(x); +} + +// Modifies the parent action (a Return() action) to perform a move of the +// argument instead of a copy. +// Return(ByMove()) actions can only be executed once and will assert this +// invariant. +template +internal::ByMoveWrapper ByMove(R x) { + return internal::ByMoveWrapper(internal::move(x)); +} + +// Creates an action that does the default action for the give mock function. +inline internal::DoDefaultAction DoDefault() { + return internal::DoDefaultAction(); +} + +// Creates an action that sets the variable pointed by the N-th +// (0-based) function argument to 'value'. +template +PolymorphicAction< + internal::SetArgumentPointeeAction< + N, T, internal::IsAProtocolMessage::value> > +SetArgPointee(const T& x) { + return MakePolymorphicAction(internal::SetArgumentPointeeAction< + N, T, internal::IsAProtocolMessage::value>(x)); +} + +#if !((GTEST_GCC_VER_ && GTEST_GCC_VER_ < 40000) || GTEST_OS_SYMBIAN) +// This overload allows SetArgPointee() to accept a string literal. +// GCC prior to the version 4.0 and Symbian C++ compiler cannot distinguish +// this overload from the templated version and emit a compile error. +template +PolymorphicAction< + internal::SetArgumentPointeeAction > +SetArgPointee(const char* p) { + return MakePolymorphicAction(internal::SetArgumentPointeeAction< + N, const char*, false>(p)); +} + +template +PolymorphicAction< + internal::SetArgumentPointeeAction > +SetArgPointee(const wchar_t* p) { + return MakePolymorphicAction(internal::SetArgumentPointeeAction< + N, const wchar_t*, false>(p)); +} +#endif + +// The following version is DEPRECATED. +template +PolymorphicAction< + internal::SetArgumentPointeeAction< + N, T, internal::IsAProtocolMessage::value> > +SetArgumentPointee(const T& x) { + return MakePolymorphicAction(internal::SetArgumentPointeeAction< + N, T, internal::IsAProtocolMessage::value>(x)); +} + +// Creates an action that sets a pointer referent to a given value. +template +PolymorphicAction > Assign(T1* ptr, T2 val) { + return MakePolymorphicAction(internal::AssignAction(ptr, val)); +} + +#if !GTEST_OS_WINDOWS_MOBILE + +// Creates an action that sets errno and returns the appropriate error. +template +PolymorphicAction > +SetErrnoAndReturn(int errval, T result) { + return MakePolymorphicAction( + internal::SetErrnoAndReturnAction(errval, result)); +} + +#endif // !GTEST_OS_WINDOWS_MOBILE + +// Various overloads for InvokeWithoutArgs(). + +// Creates an action that invokes 'function_impl' with no argument. +template +PolymorphicAction > +InvokeWithoutArgs(FunctionImpl function_impl) { + return MakePolymorphicAction( + internal::InvokeWithoutArgsAction(function_impl)); +} + +// Creates an action that invokes the given method on the given object +// with no argument. +template +PolymorphicAction > +InvokeWithoutArgs(Class* obj_ptr, MethodPtr method_ptr) { + return MakePolymorphicAction( + internal::InvokeMethodWithoutArgsAction( + obj_ptr, method_ptr)); +} + +// Creates an action that performs an_action and throws away its +// result. In other words, it changes the return type of an_action to +// void. an_action MUST NOT return void, or the code won't compile. +template +inline internal::IgnoreResultAction IgnoreResult(const A& an_action) { + return internal::IgnoreResultAction(an_action); +} + +// Creates a reference wrapper for the given L-value. If necessary, +// you can explicitly specify the type of the reference. For example, +// suppose 'derived' is an object of type Derived, ByRef(derived) +// would wrap a Derived&. If you want to wrap a const Base& instead, +// where Base is a base class of Derived, just write: +// +// ByRef(derived) +template +inline internal::ReferenceWrapper ByRef(T& l_value) { // NOLINT + return internal::ReferenceWrapper(l_value); +} + +} // namespace testing + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_ diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-cardinalities.h b/tools/external/googletest/googlemock/include/gmock/gmock-cardinalities.h new file mode 100644 index 00000000..fc315f92 --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-cardinalities.h @@ -0,0 +1,147 @@ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used cardinalities. More +// cardinalities can be defined by the user implementing the +// CardinalityInterface interface if necessary. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_ + +#include +#include // NOLINT +#include "gmock/internal/gmock-port.h" +#include "gtest/gtest.h" + +namespace testing { + +// To implement a cardinality Foo, define: +// 1. a class FooCardinality that implements the +// CardinalityInterface interface, and +// 2. a factory function that creates a Cardinality object from a +// const FooCardinality*. +// +// The two-level delegation design follows that of Matcher, providing +// consistency for extension developers. It also eases ownership +// management as Cardinality objects can now be copied like plain values. + +// The implementation of a cardinality. +class CardinalityInterface { + public: + virtual ~CardinalityInterface() {} + + // Conservative estimate on the lower/upper bound of the number of + // calls allowed. + virtual int ConservativeLowerBound() const { return 0; } + virtual int ConservativeUpperBound() const { return INT_MAX; } + + // Returns true iff call_count calls will satisfy this cardinality. + virtual bool IsSatisfiedByCallCount(int call_count) const = 0; + + // Returns true iff call_count calls will saturate this cardinality. + virtual bool IsSaturatedByCallCount(int call_count) const = 0; + + // Describes self to an ostream. + virtual void DescribeTo(::std::ostream* os) const = 0; +}; + +// A Cardinality is a copyable and IMMUTABLE (except by assignment) +// object that specifies how many times a mock function is expected to +// be called. The implementation of Cardinality is just a linked_ptr +// to const CardinalityInterface, so copying is fairly cheap. +// Don't inherit from Cardinality! +class GTEST_API_ Cardinality { + public: + // Constructs a null cardinality. Needed for storing Cardinality + // objects in STL containers. + Cardinality() {} + + // Constructs a Cardinality from its implementation. + explicit Cardinality(const CardinalityInterface* impl) : impl_(impl) {} + + // Conservative estimate on the lower/upper bound of the number of + // calls allowed. + int ConservativeLowerBound() const { return impl_->ConservativeLowerBound(); } + int ConservativeUpperBound() const { return impl_->ConservativeUpperBound(); } + + // Returns true iff call_count calls will satisfy this cardinality. + bool IsSatisfiedByCallCount(int call_count) const { + return impl_->IsSatisfiedByCallCount(call_count); + } + + // Returns true iff call_count calls will saturate this cardinality. + bool IsSaturatedByCallCount(int call_count) const { + return impl_->IsSaturatedByCallCount(call_count); + } + + // Returns true iff call_count calls will over-saturate this + // cardinality, i.e. exceed the maximum number of allowed calls. + bool IsOverSaturatedByCallCount(int call_count) const { + return impl_->IsSaturatedByCallCount(call_count) && + !impl_->IsSatisfiedByCallCount(call_count); + } + + // Describes self to an ostream + void DescribeTo(::std::ostream* os) const { impl_->DescribeTo(os); } + + // Describes the given actual call count to an ostream. + static void DescribeActualCallCountTo(int actual_call_count, + ::std::ostream* os); + + private: + internal::linked_ptr impl_; +}; + +// Creates a cardinality that allows at least n calls. +GTEST_API_ Cardinality AtLeast(int n); + +// Creates a cardinality that allows at most n calls. +GTEST_API_ Cardinality AtMost(int n); + +// Creates a cardinality that allows any number of calls. +GTEST_API_ Cardinality AnyNumber(); + +// Creates a cardinality that allows between min and max calls. +GTEST_API_ Cardinality Between(int min, int max); + +// Creates a cardinality that allows exactly n calls. +GTEST_API_ Cardinality Exactly(int n); + +// Creates a cardinality from its implementation. +inline Cardinality MakeCardinality(const CardinalityInterface* c) { + return Cardinality(c); +} + +} // namespace testing + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_ diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-generated-actions.h b/tools/external/googletest/googlemock/include/gmock/gmock-generated-actions.h new file mode 100644 index 00000000..be4ebe4f --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-generated-actions.h @@ -0,0 +1,2377 @@ +// This file was GENERATED by a script. DO NOT EDIT BY HAND!!! + +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used variadic actions. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ + +#include "gmock/gmock-actions.h" +#include "gmock/internal/gmock-port.h" + +namespace testing { +namespace internal { + +// InvokeHelper knows how to unpack an N-tuple and invoke an N-ary +// function or method with the unpacked values, where F is a function +// type that takes N arguments. +template +class InvokeHelper; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple<>&) { + return function(); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple<>&) { + return (obj_ptr->*method_ptr)(); + } +}; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple& args) { + return function(get<0>(args)); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple& args) { + return (obj_ptr->*method_ptr)(get<0>(args)); + } +}; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple& args) { + return function(get<0>(args), get<1>(args)); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple& args) { + return (obj_ptr->*method_ptr)(get<0>(args), get<1>(args)); + } +}; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple& args) { + return function(get<0>(args), get<1>(args), get<2>(args)); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple& args) { + return (obj_ptr->*method_ptr)(get<0>(args), get<1>(args), + get<2>(args)); + } +}; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple& args) { + return function(get<0>(args), get<1>(args), get<2>(args), + get<3>(args)); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple& args) { + return (obj_ptr->*method_ptr)(get<0>(args), get<1>(args), + get<2>(args), get<3>(args)); + } +}; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple& args) { + return function(get<0>(args), get<1>(args), get<2>(args), + get<3>(args), get<4>(args)); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple& args) { + return (obj_ptr->*method_ptr)(get<0>(args), get<1>(args), + get<2>(args), get<3>(args), get<4>(args)); + } +}; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple& args) { + return function(get<0>(args), get<1>(args), get<2>(args), + get<3>(args), get<4>(args), get<5>(args)); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple& args) { + return (obj_ptr->*method_ptr)(get<0>(args), get<1>(args), + get<2>(args), get<3>(args), get<4>(args), get<5>(args)); + } +}; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple& args) { + return function(get<0>(args), get<1>(args), get<2>(args), + get<3>(args), get<4>(args), get<5>(args), get<6>(args)); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple& args) { + return (obj_ptr->*method_ptr)(get<0>(args), get<1>(args), + get<2>(args), get<3>(args), get<4>(args), get<5>(args), + get<6>(args)); + } +}; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple& args) { + return function(get<0>(args), get<1>(args), get<2>(args), + get<3>(args), get<4>(args), get<5>(args), get<6>(args), + get<7>(args)); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple& args) { + return (obj_ptr->*method_ptr)(get<0>(args), get<1>(args), + get<2>(args), get<3>(args), get<4>(args), get<5>(args), + get<6>(args), get<7>(args)); + } +}; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple& args) { + return function(get<0>(args), get<1>(args), get<2>(args), + get<3>(args), get<4>(args), get<5>(args), get<6>(args), + get<7>(args), get<8>(args)); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple& args) { + return (obj_ptr->*method_ptr)(get<0>(args), get<1>(args), + get<2>(args), get<3>(args), get<4>(args), get<5>(args), + get<6>(args), get<7>(args), get<8>(args)); + } +}; + +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple& args) { + return function(get<0>(args), get<1>(args), get<2>(args), + get<3>(args), get<4>(args), get<5>(args), get<6>(args), + get<7>(args), get<8>(args), get<9>(args)); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple& args) { + return (obj_ptr->*method_ptr)(get<0>(args), get<1>(args), + get<2>(args), get<3>(args), get<4>(args), get<5>(args), + get<6>(args), get<7>(args), get<8>(args), get<9>(args)); + } +}; + +// An INTERNAL macro for extracting the type of a tuple field. It's +// subject to change without notice - DO NOT USE IN USER CODE! +#define GMOCK_FIELD_(Tuple, N) \ + typename ::testing::tuple_element::type + +// SelectArgs::type is the +// type of an n-ary function whose i-th (1-based) argument type is the +// k{i}-th (0-based) field of ArgumentTuple, which must be a tuple +// type, and whose return type is Result. For example, +// SelectArgs, 0, 3>::type +// is int(bool, long). +// +// SelectArgs::Select(args) +// returns the selected fields (k1, k2, ..., k_n) of args as a tuple. +// For example, +// SelectArgs, 2, 0>::Select( +// ::testing::make_tuple(true, 'a', 2.5)) +// returns tuple (2.5, true). +// +// The numbers in list k1, k2, ..., k_n must be >= 0, where n can be +// in the range [0, 10]. Duplicates are allowed and they don't have +// to be in an ascending or descending order. + +template +class SelectArgs { + public: + typedef Result type(GMOCK_FIELD_(ArgumentTuple, k1), + GMOCK_FIELD_(ArgumentTuple, k2), GMOCK_FIELD_(ArgumentTuple, k3), + GMOCK_FIELD_(ArgumentTuple, k4), GMOCK_FIELD_(ArgumentTuple, k5), + GMOCK_FIELD_(ArgumentTuple, k6), GMOCK_FIELD_(ArgumentTuple, k7), + GMOCK_FIELD_(ArgumentTuple, k8), GMOCK_FIELD_(ArgumentTuple, k9), + GMOCK_FIELD_(ArgumentTuple, k10)); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs(get(args), get(args), get(args), + get(args), get(args), get(args), get(args), + get(args), get(args), get(args)); + } +}; + +template +class SelectArgs { + public: + typedef Result type(); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& /* args */) { + return SelectedArgs(); + } +}; + +template +class SelectArgs { + public: + typedef Result type(GMOCK_FIELD_(ArgumentTuple, k1)); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs(get(args)); + } +}; + +template +class SelectArgs { + public: + typedef Result type(GMOCK_FIELD_(ArgumentTuple, k1), + GMOCK_FIELD_(ArgumentTuple, k2)); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs(get(args), get(args)); + } +}; + +template +class SelectArgs { + public: + typedef Result type(GMOCK_FIELD_(ArgumentTuple, k1), + GMOCK_FIELD_(ArgumentTuple, k2), GMOCK_FIELD_(ArgumentTuple, k3)); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs(get(args), get(args), get(args)); + } +}; + +template +class SelectArgs { + public: + typedef Result type(GMOCK_FIELD_(ArgumentTuple, k1), + GMOCK_FIELD_(ArgumentTuple, k2), GMOCK_FIELD_(ArgumentTuple, k3), + GMOCK_FIELD_(ArgumentTuple, k4)); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs(get(args), get(args), get(args), + get(args)); + } +}; + +template +class SelectArgs { + public: + typedef Result type(GMOCK_FIELD_(ArgumentTuple, k1), + GMOCK_FIELD_(ArgumentTuple, k2), GMOCK_FIELD_(ArgumentTuple, k3), + GMOCK_FIELD_(ArgumentTuple, k4), GMOCK_FIELD_(ArgumentTuple, k5)); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs(get(args), get(args), get(args), + get(args), get(args)); + } +}; + +template +class SelectArgs { + public: + typedef Result type(GMOCK_FIELD_(ArgumentTuple, k1), + GMOCK_FIELD_(ArgumentTuple, k2), GMOCK_FIELD_(ArgumentTuple, k3), + GMOCK_FIELD_(ArgumentTuple, k4), GMOCK_FIELD_(ArgumentTuple, k5), + GMOCK_FIELD_(ArgumentTuple, k6)); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs(get(args), get(args), get(args), + get(args), get(args), get(args)); + } +}; + +template +class SelectArgs { + public: + typedef Result type(GMOCK_FIELD_(ArgumentTuple, k1), + GMOCK_FIELD_(ArgumentTuple, k2), GMOCK_FIELD_(ArgumentTuple, k3), + GMOCK_FIELD_(ArgumentTuple, k4), GMOCK_FIELD_(ArgumentTuple, k5), + GMOCK_FIELD_(ArgumentTuple, k6), GMOCK_FIELD_(ArgumentTuple, k7)); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs(get(args), get(args), get(args), + get(args), get(args), get(args), get(args)); + } +}; + +template +class SelectArgs { + public: + typedef Result type(GMOCK_FIELD_(ArgumentTuple, k1), + GMOCK_FIELD_(ArgumentTuple, k2), GMOCK_FIELD_(ArgumentTuple, k3), + GMOCK_FIELD_(ArgumentTuple, k4), GMOCK_FIELD_(ArgumentTuple, k5), + GMOCK_FIELD_(ArgumentTuple, k6), GMOCK_FIELD_(ArgumentTuple, k7), + GMOCK_FIELD_(ArgumentTuple, k8)); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs(get(args), get(args), get(args), + get(args), get(args), get(args), get(args), + get(args)); + } +}; + +template +class SelectArgs { + public: + typedef Result type(GMOCK_FIELD_(ArgumentTuple, k1), + GMOCK_FIELD_(ArgumentTuple, k2), GMOCK_FIELD_(ArgumentTuple, k3), + GMOCK_FIELD_(ArgumentTuple, k4), GMOCK_FIELD_(ArgumentTuple, k5), + GMOCK_FIELD_(ArgumentTuple, k6), GMOCK_FIELD_(ArgumentTuple, k7), + GMOCK_FIELD_(ArgumentTuple, k8), GMOCK_FIELD_(ArgumentTuple, k9)); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs(get(args), get(args), get(args), + get(args), get(args), get(args), get(args), + get(args), get(args)); + } +}; + +#undef GMOCK_FIELD_ + +// Implements the WithArgs action. +template +class WithArgsAction { + public: + explicit WithArgsAction(const InnerAction& action) : action_(action) {} + + template + operator Action() const { return MakeAction(new Impl(action_)); } + + private: + template + class Impl : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + + explicit Impl(const InnerAction& action) : action_(action) {} + + virtual Result Perform(const ArgumentTuple& args) { + return action_.Perform(SelectArgs::Select(args)); + } + + private: + typedef typename SelectArgs::type InnerFunctionType; + + Action action_; + }; + + const InnerAction action_; + + GTEST_DISALLOW_ASSIGN_(WithArgsAction); +}; + +// A macro from the ACTION* family (defined later in this file) +// defines an action that can be used in a mock function. Typically, +// these actions only care about a subset of the arguments of the mock +// function. For example, if such an action only uses the second +// argument, it can be used in any mock function that takes >= 2 +// arguments where the type of the second argument is compatible. +// +// Therefore, the action implementation must be prepared to take more +// arguments than it needs. The ExcessiveArg type is used to +// represent those excessive arguments. In order to keep the compiler +// error messages tractable, we define it in the testing namespace +// instead of testing::internal. However, this is an INTERNAL TYPE +// and subject to change without notice, so a user MUST NOT USE THIS +// TYPE DIRECTLY. +struct ExcessiveArg {}; + +// A helper class needed for implementing the ACTION* macros. +template +class ActionHelper { + public: + static Result Perform(Impl* impl, const ::testing::tuple<>& args) { + return impl->template gmock_PerformImpl<>(args, ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::testing::tuple& args) { + return impl->template gmock_PerformImpl(args, get<0>(args), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::testing::tuple& args) { + return impl->template gmock_PerformImpl(args, get<0>(args), + get<1>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::testing::tuple& args) { + return impl->template gmock_PerformImpl(args, get<0>(args), + get<1>(args), get<2>(args), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::testing::tuple& args) { + return impl->template gmock_PerformImpl(args, get<0>(args), + get<1>(args), get<2>(args), get<3>(args), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::testing::tuple& args) { + return impl->template gmock_PerformImpl(args, + get<0>(args), get<1>(args), get<2>(args), get<3>(args), get<4>(args), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::testing::tuple& args) { + return impl->template gmock_PerformImpl(args, + get<0>(args), get<1>(args), get<2>(args), get<3>(args), get<4>(args), + get<5>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::testing::tuple& args) { + return impl->template gmock_PerformImpl(args, + get<0>(args), get<1>(args), get<2>(args), get<3>(args), get<4>(args), + get<5>(args), get<6>(args), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::testing::tuple& args) { + return impl->template gmock_PerformImpl(args, get<0>(args), get<1>(args), get<2>(args), get<3>(args), + get<4>(args), get<5>(args), get<6>(args), get<7>(args), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::testing::tuple& args) { + return impl->template gmock_PerformImpl(args, get<0>(args), get<1>(args), get<2>(args), get<3>(args), + get<4>(args), get<5>(args), get<6>(args), get<7>(args), get<8>(args), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::testing::tuple& args) { + return impl->template gmock_PerformImpl(args, get<0>(args), get<1>(args), get<2>(args), get<3>(args), + get<4>(args), get<5>(args), get<6>(args), get<7>(args), get<8>(args), + get<9>(args)); + } +}; + +} // namespace internal + +// Various overloads for Invoke(). + +// WithArgs(an_action) creates an action that passes +// the selected arguments of the mock function to an_action and +// performs it. It serves as an adaptor between actions with +// different argument lists. C++ doesn't support default arguments for +// function templates, so we have to overload it. +template +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + +template +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + +template +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + +template +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + +template +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + +template +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + +template +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + +template +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + +template +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + +template +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + +// Creates an action that does actions a1, a2, ..., sequentially in +// each invocation. +template +inline internal::DoBothAction +DoAll(Action1 a1, Action2 a2) { + return internal::DoBothAction(a1, a2); +} + +template +inline internal::DoBothAction > +DoAll(Action1 a1, Action2 a2, Action3 a3) { + return DoAll(a1, DoAll(a2, a3)); +} + +template +inline internal::DoBothAction > > +DoAll(Action1 a1, Action2 a2, Action3 a3, Action4 a4) { + return DoAll(a1, DoAll(a2, a3, a4)); +} + +template +inline internal::DoBothAction > > > +DoAll(Action1 a1, Action2 a2, Action3 a3, Action4 a4, Action5 a5) { + return DoAll(a1, DoAll(a2, a3, a4, a5)); +} + +template +inline internal::DoBothAction > > > > +DoAll(Action1 a1, Action2 a2, Action3 a3, Action4 a4, Action5 a5, Action6 a6) { + return DoAll(a1, DoAll(a2, a3, a4, a5, a6)); +} + +template +inline internal::DoBothAction > > > > > +DoAll(Action1 a1, Action2 a2, Action3 a3, Action4 a4, Action5 a5, Action6 a6, + Action7 a7) { + return DoAll(a1, DoAll(a2, a3, a4, a5, a6, a7)); +} + +template +inline internal::DoBothAction > > > > > > +DoAll(Action1 a1, Action2 a2, Action3 a3, Action4 a4, Action5 a5, Action6 a6, + Action7 a7, Action8 a8) { + return DoAll(a1, DoAll(a2, a3, a4, a5, a6, a7, a8)); +} + +template +inline internal::DoBothAction > > > > > > > +DoAll(Action1 a1, Action2 a2, Action3 a3, Action4 a4, Action5 a5, Action6 a6, + Action7 a7, Action8 a8, Action9 a9) { + return DoAll(a1, DoAll(a2, a3, a4, a5, a6, a7, a8, a9)); +} + +template +inline internal::DoBothAction > > > > > > > > +DoAll(Action1 a1, Action2 a2, Action3 a3, Action4 a4, Action5 a5, Action6 a6, + Action7 a7, Action8 a8, Action9 a9, Action10 a10) { + return DoAll(a1, DoAll(a2, a3, a4, a5, a6, a7, a8, a9, a10)); +} + +} // namespace testing + +// The ACTION* family of macros can be used in a namespace scope to +// define custom actions easily. The syntax: +// +// ACTION(name) { statements; } +// +// will define an action with the given name that executes the +// statements. The value returned by the statements will be used as +// the return value of the action. Inside the statements, you can +// refer to the K-th (0-based) argument of the mock function by +// 'argK', and refer to its type by 'argK_type'. For example: +// +// ACTION(IncrementArg1) { +// arg1_type temp = arg1; +// return ++(*temp); +// } +// +// allows you to write +// +// ...WillOnce(IncrementArg1()); +// +// You can also refer to the entire argument tuple and its type by +// 'args' and 'args_type', and refer to the mock function type and its +// return type by 'function_type' and 'return_type'. +// +// Note that you don't need to specify the types of the mock function +// arguments. However rest assured that your code is still type-safe: +// you'll get a compiler error if *arg1 doesn't support the ++ +// operator, or if the type of ++(*arg1) isn't compatible with the +// mock function's return type, for example. +// +// Sometimes you'll want to parameterize the action. For that you can use +// another macro: +// +// ACTION_P(name, param_name) { statements; } +// +// For example: +// +// ACTION_P(Add, n) { return arg0 + n; } +// +// will allow you to write: +// +// ...WillOnce(Add(5)); +// +// Note that you don't need to provide the type of the parameter +// either. If you need to reference the type of a parameter named +// 'foo', you can write 'foo_type'. For example, in the body of +// ACTION_P(Add, n) above, you can write 'n_type' to refer to the type +// of 'n'. +// +// We also provide ACTION_P2, ACTION_P3, ..., up to ACTION_P10 to support +// multi-parameter actions. +// +// For the purpose of typing, you can view +// +// ACTION_Pk(Foo, p1, ..., pk) { ... } +// +// as shorthand for +// +// template +// FooActionPk Foo(p1_type p1, ..., pk_type pk) { ... } +// +// In particular, you can provide the template type arguments +// explicitly when invoking Foo(), as in Foo(5, false); +// although usually you can rely on the compiler to infer the types +// for you automatically. You can assign the result of expression +// Foo(p1, ..., pk) to a variable of type FooActionPk. This can be useful when composing actions. +// +// You can also overload actions with different numbers of parameters: +// +// ACTION_P(Plus, a) { ... } +// ACTION_P2(Plus, a, b) { ... } +// +// While it's tempting to always use the ACTION* macros when defining +// a new action, you should also consider implementing ActionInterface +// or using MakePolymorphicAction() instead, especially if you need to +// use the action a lot. While these approaches require more work, +// they give you more control on the types of the mock function +// arguments and the action parameters, which in general leads to +// better compiler error messages that pay off in the long run. They +// also allow overloading actions based on parameter types (as opposed +// to just based on the number of parameters). +// +// CAVEAT: +// +// ACTION*() can only be used in a namespace scope. The reason is +// that C++ doesn't yet allow function-local types to be used to +// instantiate templates. The up-coming C++0x standard will fix this. +// Once that's done, we'll consider supporting using ACTION*() inside +// a function. +// +// MORE INFORMATION: +// +// To learn more about using these macros, please search for 'ACTION' +// on https://github.com/google/googletest/blob/master/googlemock/docs/CookBook.md + +// An internal macro needed for implementing ACTION*(). +#define GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_\ + const args_type& args GTEST_ATTRIBUTE_UNUSED_, \ + arg0_type arg0 GTEST_ATTRIBUTE_UNUSED_, \ + arg1_type arg1 GTEST_ATTRIBUTE_UNUSED_, \ + arg2_type arg2 GTEST_ATTRIBUTE_UNUSED_, \ + arg3_type arg3 GTEST_ATTRIBUTE_UNUSED_, \ + arg4_type arg4 GTEST_ATTRIBUTE_UNUSED_, \ + arg5_type arg5 GTEST_ATTRIBUTE_UNUSED_, \ + arg6_type arg6 GTEST_ATTRIBUTE_UNUSED_, \ + arg7_type arg7 GTEST_ATTRIBUTE_UNUSED_, \ + arg8_type arg8 GTEST_ATTRIBUTE_UNUSED_, \ + arg9_type arg9 GTEST_ATTRIBUTE_UNUSED_ + +// Sometimes you want to give an action explicit template parameters +// that cannot be inferred from its value parameters. ACTION() and +// ACTION_P*() don't support that. ACTION_TEMPLATE() remedies that +// and can be viewed as an extension to ACTION() and ACTION_P*(). +// +// The syntax: +// +// ACTION_TEMPLATE(ActionName, +// HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m), +// AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; } +// +// defines an action template that takes m explicit template +// parameters and n value parameters. name_i is the name of the i-th +// template parameter, and kind_i specifies whether it's a typename, +// an integral constant, or a template. p_i is the name of the i-th +// value parameter. +// +// Example: +// +// // DuplicateArg(output) converts the k-th argument of the mock +// // function to type T and copies it to *output. +// ACTION_TEMPLATE(DuplicateArg, +// HAS_2_TEMPLATE_PARAMS(int, k, typename, T), +// AND_1_VALUE_PARAMS(output)) { +// *output = T(::testing::get(args)); +// } +// ... +// int n; +// EXPECT_CALL(mock, Foo(_, _)) +// .WillOnce(DuplicateArg<1, unsigned char>(&n)); +// +// To create an instance of an action template, write: +// +// ActionName(v1, ..., v_n) +// +// where the ts are the template arguments and the vs are the value +// arguments. The value argument types are inferred by the compiler. +// If you want to explicitly specify the value argument types, you can +// provide additional template arguments: +// +// ActionName(v1, ..., v_n) +// +// where u_i is the desired type of v_i. +// +// ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded on the +// number of value parameters, but not on the number of template +// parameters. Without the restriction, the meaning of the following +// is unclear: +// +// OverloadedAction(x); +// +// Are we using a single-template-parameter action where 'bool' refers +// to the type of x, or are we using a two-template-parameter action +// where the compiler is asked to infer the type of x? +// +// Implementation notes: +// +// GMOCK_INTERNAL_*_HAS_m_TEMPLATE_PARAMS and +// GMOCK_INTERNAL_*_AND_n_VALUE_PARAMS are internal macros for +// implementing ACTION_TEMPLATE. The main trick we use is to create +// new macro invocations when expanding a macro. For example, we have +// +// #define ACTION_TEMPLATE(name, template_params, value_params) +// ... GMOCK_INTERNAL_DECL_##template_params ... +// +// which causes ACTION_TEMPLATE(..., HAS_1_TEMPLATE_PARAMS(typename, T), ...) +// to expand to +// +// ... GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(typename, T) ... +// +// Since GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS is a macro, the +// preprocessor will continue to expand it to +// +// ... typename T ... +// +// This technique conforms to the C++ standard and is portable. It +// allows us to implement action templates using O(N) code, where N is +// the maximum number of template/value parameters supported. Without +// using it, we'd have to devote O(N^2) amount of code to implement all +// combinations of m and n. + +// Declares the template parameters. +#define GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(kind0, name0) kind0 name0 +#define GMOCK_INTERNAL_DECL_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, \ + name1) kind0 name0, kind1 name1 +#define GMOCK_INTERNAL_DECL_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2) kind0 name0, kind1 name1, kind2 name2 +#define GMOCK_INTERNAL_DECL_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3) kind0 name0, kind1 name1, kind2 name2, \ + kind3 name3 +#define GMOCK_INTERNAL_DECL_HAS_5_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4) kind0 name0, kind1 name1, \ + kind2 name2, kind3 name3, kind4 name4 +#define GMOCK_INTERNAL_DECL_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5) kind0 name0, \ + kind1 name1, kind2 name2, kind3 name3, kind4 name4, kind5 name5 +#define GMOCK_INTERNAL_DECL_HAS_7_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ + name6) kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4, \ + kind5 name5, kind6 name6 +#define GMOCK_INTERNAL_DECL_HAS_8_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ + kind7, name7) kind0 name0, kind1 name1, kind2 name2, kind3 name3, \ + kind4 name4, kind5 name5, kind6 name6, kind7 name7 +#define GMOCK_INTERNAL_DECL_HAS_9_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ + kind7, name7, kind8, name8) kind0 name0, kind1 name1, kind2 name2, \ + kind3 name3, kind4 name4, kind5 name5, kind6 name6, kind7 name7, \ + kind8 name8 +#define GMOCK_INTERNAL_DECL_HAS_10_TEMPLATE_PARAMS(kind0, name0, kind1, \ + name1, kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ + name6, kind7, name7, kind8, name8, kind9, name9) kind0 name0, \ + kind1 name1, kind2 name2, kind3 name3, kind4 name4, kind5 name5, \ + kind6 name6, kind7 name7, kind8 name8, kind9 name9 + +// Lists the template parameters. +#define GMOCK_INTERNAL_LIST_HAS_1_TEMPLATE_PARAMS(kind0, name0) name0 +#define GMOCK_INTERNAL_LIST_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, \ + name1) name0, name1 +#define GMOCK_INTERNAL_LIST_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2) name0, name1, name2 +#define GMOCK_INTERNAL_LIST_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3) name0, name1, name2, name3 +#define GMOCK_INTERNAL_LIST_HAS_5_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4) name0, name1, name2, name3, \ + name4 +#define GMOCK_INTERNAL_LIST_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5) name0, name1, \ + name2, name3, name4, name5 +#define GMOCK_INTERNAL_LIST_HAS_7_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ + name6) name0, name1, name2, name3, name4, name5, name6 +#define GMOCK_INTERNAL_LIST_HAS_8_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ + kind7, name7) name0, name1, name2, name3, name4, name5, name6, name7 +#define GMOCK_INTERNAL_LIST_HAS_9_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ + kind7, name7, kind8, name8) name0, name1, name2, name3, name4, name5, \ + name6, name7, name8 +#define GMOCK_INTERNAL_LIST_HAS_10_TEMPLATE_PARAMS(kind0, name0, kind1, \ + name1, kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ + name6, kind7, name7, kind8, name8, kind9, name9) name0, name1, name2, \ + name3, name4, name5, name6, name7, name8, name9 + +// Declares the types of value parameters. +#define GMOCK_INTERNAL_DECL_TYPE_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_DECL_TYPE_AND_1_VALUE_PARAMS(p0) , typename p0##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_2_VALUE_PARAMS(p0, p1) , \ + typename p0##_type, typename p1##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) , \ + typename p0##_type, typename p1##_type, typename p2##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) , \ + typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) , \ + typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type, typename p4##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) , \ + typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type, typename p4##_type, typename p5##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6) , typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type, typename p4##_type, typename p5##_type, \ + typename p6##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7) , typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type, typename p4##_type, typename p5##_type, \ + typename p6##_type, typename p7##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8) , typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type, typename p4##_type, typename p5##_type, \ + typename p6##_type, typename p7##_type, typename p8##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8, p9) , typename p0##_type, typename p1##_type, \ + typename p2##_type, typename p3##_type, typename p4##_type, \ + typename p5##_type, typename p6##_type, typename p7##_type, \ + typename p8##_type, typename p9##_type + +// Initializes the value parameters. +#define GMOCK_INTERNAL_INIT_AND_0_VALUE_PARAMS()\ + () +#define GMOCK_INTERNAL_INIT_AND_1_VALUE_PARAMS(p0)\ + (p0##_type gmock_p0) : p0(gmock_p0) +#define GMOCK_INTERNAL_INIT_AND_2_VALUE_PARAMS(p0, p1)\ + (p0##_type gmock_p0, p1##_type gmock_p1) : p0(gmock_p0), p1(gmock_p1) +#define GMOCK_INTERNAL_INIT_AND_3_VALUE_PARAMS(p0, p1, p2)\ + (p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2) +#define GMOCK_INTERNAL_INIT_AND_4_VALUE_PARAMS(p0, p1, p2, p3)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3) +#define GMOCK_INTERNAL_INIT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4) : p0(gmock_p0), p1(gmock_p1), \ + p2(gmock_p2), p3(gmock_p3), p4(gmock_p4) +#define GMOCK_INTERNAL_INIT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5) +#define GMOCK_INTERNAL_INIT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6) +#define GMOCK_INTERNAL_INIT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7) : p0(gmock_p0), p1(gmock_p1), \ + p2(gmock_p2), p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), \ + p7(gmock_p7) +#define GMOCK_INTERNAL_INIT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), p7(gmock_p7), \ + p8(gmock_p8) +#define GMOCK_INTERNAL_INIT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8, p9)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8, \ + p9##_type gmock_p9) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), p7(gmock_p7), \ + p8(gmock_p8), p9(gmock_p9) + +// Declares the fields for storing the value parameters. +#define GMOCK_INTERNAL_DEFN_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_DEFN_AND_1_VALUE_PARAMS(p0) p0##_type p0; +#define GMOCK_INTERNAL_DEFN_AND_2_VALUE_PARAMS(p0, p1) p0##_type p0; \ + p1##_type p1; +#define GMOCK_INTERNAL_DEFN_AND_3_VALUE_PARAMS(p0, p1, p2) p0##_type p0; \ + p1##_type p1; p2##_type p2; +#define GMOCK_INTERNAL_DEFN_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0##_type p0; \ + p1##_type p1; p2##_type p2; p3##_type p3; +#define GMOCK_INTERNAL_DEFN_AND_5_VALUE_PARAMS(p0, p1, p2, p3, \ + p4) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; +#define GMOCK_INTERNAL_DEFN_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, \ + p5) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \ + p5##_type p5; +#define GMOCK_INTERNAL_DEFN_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \ + p5##_type p5; p6##_type p6; +#define GMOCK_INTERNAL_DEFN_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \ + p5##_type p5; p6##_type p6; p7##_type p7; +#define GMOCK_INTERNAL_DEFN_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; \ + p4##_type p4; p5##_type p5; p6##_type p6; p7##_type p7; p8##_type p8; +#define GMOCK_INTERNAL_DEFN_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8, p9) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; \ + p4##_type p4; p5##_type p5; p6##_type p6; p7##_type p7; p8##_type p8; \ + p9##_type p9; + +// Lists the value parameters. +#define GMOCK_INTERNAL_LIST_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_LIST_AND_1_VALUE_PARAMS(p0) p0 +#define GMOCK_INTERNAL_LIST_AND_2_VALUE_PARAMS(p0, p1) p0, p1 +#define GMOCK_INTERNAL_LIST_AND_3_VALUE_PARAMS(p0, p1, p2) p0, p1, p2 +#define GMOCK_INTERNAL_LIST_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0, p1, p2, p3 +#define GMOCK_INTERNAL_LIST_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) p0, p1, \ + p2, p3, p4 +#define GMOCK_INTERNAL_LIST_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) p0, \ + p1, p2, p3, p4, p5 +#define GMOCK_INTERNAL_LIST_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6) p0, p1, p2, p3, p4, p5, p6 +#define GMOCK_INTERNAL_LIST_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7) p0, p1, p2, p3, p4, p5, p6, p7 +#define GMOCK_INTERNAL_LIST_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8) p0, p1, p2, p3, p4, p5, p6, p7, p8 +#define GMOCK_INTERNAL_LIST_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8, p9) p0, p1, p2, p3, p4, p5, p6, p7, p8, p9 + +// Lists the value parameter types. +#define GMOCK_INTERNAL_LIST_TYPE_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_LIST_TYPE_AND_1_VALUE_PARAMS(p0) , p0##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_2_VALUE_PARAMS(p0, p1) , p0##_type, \ + p1##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) , p0##_type, \ + p1##_type, p2##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) , \ + p0##_type, p1##_type, p2##_type, p3##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) , \ + p0##_type, p1##_type, p2##_type, p3##_type, p4##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) , \ + p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type, \ + p6##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \ + p5##_type, p6##_type, p7##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \ + p5##_type, p6##_type, p7##_type, p8##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8, p9) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \ + p5##_type, p6##_type, p7##_type, p8##_type, p9##_type + +// Declares the value parameters. +#define GMOCK_INTERNAL_DECL_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_DECL_AND_1_VALUE_PARAMS(p0) p0##_type p0 +#define GMOCK_INTERNAL_DECL_AND_2_VALUE_PARAMS(p0, p1) p0##_type p0, \ + p1##_type p1 +#define GMOCK_INTERNAL_DECL_AND_3_VALUE_PARAMS(p0, p1, p2) p0##_type p0, \ + p1##_type p1, p2##_type p2 +#define GMOCK_INTERNAL_DECL_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0##_type p0, \ + p1##_type p1, p2##_type p2, p3##_type p3 +#define GMOCK_INTERNAL_DECL_AND_5_VALUE_PARAMS(p0, p1, p2, p3, \ + p4) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4 +#define GMOCK_INTERNAL_DECL_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, \ + p5) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \ + p5##_type p5 +#define GMOCK_INTERNAL_DECL_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \ + p5##_type p5, p6##_type p6 +#define GMOCK_INTERNAL_DECL_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \ + p5##_type p5, p6##_type p6, p7##_type p7 +#define GMOCK_INTERNAL_DECL_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8 +#define GMOCK_INTERNAL_DECL_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8, p9) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, \ + p9##_type p9 + +// The suffix of the class template implementing the action template. +#define GMOCK_INTERNAL_COUNT_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_COUNT_AND_1_VALUE_PARAMS(p0) P +#define GMOCK_INTERNAL_COUNT_AND_2_VALUE_PARAMS(p0, p1) P2 +#define GMOCK_INTERNAL_COUNT_AND_3_VALUE_PARAMS(p0, p1, p2) P3 +#define GMOCK_INTERNAL_COUNT_AND_4_VALUE_PARAMS(p0, p1, p2, p3) P4 +#define GMOCK_INTERNAL_COUNT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) P5 +#define GMOCK_INTERNAL_COUNT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) P6 +#define GMOCK_INTERNAL_COUNT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) P7 +#define GMOCK_INTERNAL_COUNT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7) P8 +#define GMOCK_INTERNAL_COUNT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8) P9 +#define GMOCK_INTERNAL_COUNT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8, p9) P10 + +// The name of the class template implementing the action template. +#define GMOCK_ACTION_CLASS_(name, value_params)\ + GTEST_CONCAT_TOKEN_(name##Action, GMOCK_INTERNAL_COUNT_##value_params) + +#define ACTION_TEMPLATE(name, template_params, value_params)\ + template \ + class GMOCK_ACTION_CLASS_(name, value_params) {\ + public:\ + explicit GMOCK_ACTION_CLASS_(name, value_params)\ + GMOCK_INTERNAL_INIT_##value_params {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + explicit gmock_Impl GMOCK_INTERNAL_INIT_##value_params {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + GMOCK_INTERNAL_DEFN_##value_params\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(\ + new gmock_Impl(GMOCK_INTERNAL_LIST_##value_params));\ + }\ + GMOCK_INTERNAL_DEFN_##value_params\ + private:\ + GTEST_DISALLOW_ASSIGN_(GMOCK_ACTION_CLASS_(name, value_params));\ + };\ + template \ + inline GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params> name(\ + GMOCK_INTERNAL_DECL_##value_params) {\ + return GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params>(\ + GMOCK_INTERNAL_LIST_##value_params);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params>::gmock_Impl::\ + gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION(name)\ + class name##Action {\ + public:\ + name##Action() {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl() {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl());\ + }\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##Action);\ + };\ + inline name##Action name() {\ + return name##Action();\ + }\ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##Action::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P(name, p0)\ + template \ + class name##ActionP {\ + public:\ + explicit name##ActionP(p0##_type gmock_p0) : p0(gmock_p0) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + explicit gmock_Impl(p0##_type gmock_p0) : p0(gmock_p0) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + p0##_type p0;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0));\ + }\ + p0##_type p0;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP);\ + };\ + template \ + inline name##ActionP name(p0##_type p0) {\ + return name##ActionP(p0);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P2(name, p0, p1)\ + template \ + class name##ActionP2 {\ + public:\ + name##ActionP2(p0##_type gmock_p0, p1##_type gmock_p1) : p0(gmock_p0), \ + p1(gmock_p1) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1) : p0(gmock_p0), \ + p1(gmock_p1) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP2);\ + };\ + template \ + inline name##ActionP2 name(p0##_type p0, \ + p1##_type p1) {\ + return name##ActionP2(p0, p1);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP2::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P3(name, p0, p1, p2)\ + template \ + class name##ActionP3 {\ + public:\ + name##ActionP3(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP3);\ + };\ + template \ + inline name##ActionP3 name(p0##_type p0, \ + p1##_type p1, p2##_type p2) {\ + return name##ActionP3(p0, p1, p2);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP3::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P4(name, p0, p1, p2, p3)\ + template \ + class name##ActionP4 {\ + public:\ + name##ActionP4(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3) : p0(gmock_p0), p1(gmock_p1), \ + p2(gmock_p2), p3(gmock_p3) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP4);\ + };\ + template \ + inline name##ActionP4 name(p0##_type p0, p1##_type p1, p2##_type p2, \ + p3##_type p3) {\ + return name##ActionP4(p0, p1, \ + p2, p3);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP4::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P5(name, p0, p1, p2, p3, p4)\ + template \ + class name##ActionP5 {\ + public:\ + name##ActionP5(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, \ + p4##_type gmock_p4) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4) : p0(gmock_p0), \ + p1(gmock_p1), p2(gmock_p2), p3(gmock_p3), p4(gmock_p4) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP5);\ + };\ + template \ + inline name##ActionP5 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4) {\ + return name##ActionP5(p0, p1, p2, p3, p4);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP5::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P6(name, p0, p1, p2, p3, p4, p5)\ + template \ + class name##ActionP6 {\ + public:\ + name##ActionP6(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP6);\ + };\ + template \ + inline name##ActionP6 name(p0##_type p0, p1##_type p1, p2##_type p2, \ + p3##_type p3, p4##_type p4, p5##_type p5) {\ + return name##ActionP6(p0, p1, p2, p3, p4, p5);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP6::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P7(name, p0, p1, p2, p3, p4, p5, p6)\ + template \ + class name##ActionP7 {\ + public:\ + name##ActionP7(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6) : p0(gmock_p0), p1(gmock_p1), \ + p2(gmock_p2), p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), \ + p6(gmock_p6) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ + p6));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP7);\ + };\ + template \ + inline name##ActionP7 name(p0##_type p0, p1##_type p1, \ + p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ + p6##_type p6) {\ + return name##ActionP7(p0, p1, p2, p3, p4, p5, p6);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP7::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P8(name, p0, p1, p2, p3, p4, p5, p6, p7)\ + template \ + class name##ActionP8 {\ + public:\ + name##ActionP8(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, \ + p7##_type gmock_p7) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), \ + p7(gmock_p7) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7) : p0(gmock_p0), \ + p1(gmock_p1), p2(gmock_p2), p3(gmock_p3), p4(gmock_p4), \ + p5(gmock_p5), p6(gmock_p6), p7(gmock_p7) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ + p6, p7));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP8);\ + };\ + template \ + inline name##ActionP8 name(p0##_type p0, \ + p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ + p6##_type p6, p7##_type p7) {\ + return name##ActionP8(p0, p1, p2, p3, p4, p5, \ + p6, p7);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP8::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P9(name, p0, p1, p2, p3, p4, p5, p6, p7, p8)\ + template \ + class name##ActionP9 {\ + public:\ + name##ActionP9(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), p7(gmock_p7), \ + p8(gmock_p8) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), \ + p7(gmock_p7), p8(gmock_p8) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP9);\ + };\ + template \ + inline name##ActionP9 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, \ + p8##_type p8) {\ + return name##ActionP9(p0, p1, p2, \ + p3, p4, p5, p6, p7, p8);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP9::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P10(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)\ + template \ + class name##ActionP10 {\ + public:\ + name##ActionP10(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8, p9##_type gmock_p9) : p0(gmock_p0), p1(gmock_p1), \ + p2(gmock_p2), p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), \ + p7(gmock_p7), p8(gmock_p8), p9(gmock_p9) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8, \ + p9##_type gmock_p9) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), \ + p7(gmock_p7), p8(gmock_p8), p9(gmock_p9) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, arg0_type arg0, \ + arg1_type arg1, arg2_type arg2, arg3_type arg3, arg4_type arg4, \ + arg5_type arg5, arg6_type arg6, arg7_type arg7, arg8_type arg8, \ + arg9_type arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + p9##_type p9;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8, p9));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + p9##_type p9;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP10);\ + };\ + template \ + inline name##ActionP10 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, \ + p9##_type p9) {\ + return name##ActionP10(p0, \ + p1, p2, p3, p4, p5, p6, p7, p8, p9);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP10::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +namespace testing { + + +// The ACTION*() macros trigger warning C4100 (unreferenced formal +// parameter) in MSVC with -W4. Unfortunately they cannot be fixed in +// the macro definition, as the warnings are generated when the macro +// is expanded and macro expansion cannot contain #pragma. Therefore +// we suppress them here. +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable:4100) +#endif + +// Various overloads for InvokeArgument(). +// +// The InvokeArgument(a1, a2, ..., a_k) action invokes the N-th +// (0-based) argument, which must be a k-ary callable, of the mock +// function, with arguments a1, a2, ..., a_k. +// +// Notes: +// +// 1. The arguments are passed by value by default. If you need to +// pass an argument by reference, wrap it inside ByRef(). For +// example, +// +// InvokeArgument<1>(5, string("Hello"), ByRef(foo)) +// +// passes 5 and string("Hello") by value, and passes foo by +// reference. +// +// 2. If the callable takes an argument by reference but ByRef() is +// not used, it will receive the reference to a copy of the value, +// instead of the original value. For example, when the 0-th +// argument of the mock function takes a const string&, the action +// +// InvokeArgument<0>(string("Hello")) +// +// makes a copy of the temporary string("Hello") object and passes a +// reference of the copy, instead of the original temporary object, +// to the callable. This makes it easy for a user to define an +// InvokeArgument action from temporary values and have it performed +// later. + +namespace internal { +namespace invoke_argument { + +// Appears in InvokeArgumentAdl's argument list to help avoid +// accidental calls to user functions of the same name. +struct AdlTag {}; + +// InvokeArgumentAdl - a helper for InvokeArgument. +// The basic overloads are provided here for generic functors. +// Overloads for other custom-callables are provided in the +// internal/custom/callback-actions.h header. + +template +R InvokeArgumentAdl(AdlTag, F f) { + return f(); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1) { + return f(a1); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2) { + return f(a1, a2); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3) { + return f(a1, a2, a3); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4) { + return f(a1, a2, a3, a4); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { + return f(a1, a2, a3, a4, a5); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6) { + return f(a1, a2, a3, a4, a5, a6); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, + A7 a7) { + return f(a1, a2, a3, a4, a5, a6, a7); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, + A7 a7, A8 a8) { + return f(a1, a2, a3, a4, a5, a6, a7, a8); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, + A7 a7, A8 a8, A9 a9) { + return f(a1, a2, a3, a4, a5, a6, a7, a8, a9); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, + A7 a7, A8 a8, A9 a9, A10 a10) { + return f(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10); +} +} // namespace invoke_argument +} // namespace internal + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_0_VALUE_PARAMS()) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args)); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_1_VALUE_PARAMS(p0)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args), p0); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_2_VALUE_PARAMS(p0, p1)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args), p0, p1); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_3_VALUE_PARAMS(p0, p1, p2)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args), p0, p1, p2); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_4_VALUE_PARAMS(p0, p1, p2, p3)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args), p0, p1, p2, p3); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args), p0, p1, p2, p3, p4); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args), p0, p1, p2, p3, p4, p5); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args), p0, p1, p2, p3, p4, p5, p6); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args), p0, p1, p2, p3, p4, p5, p6, p7); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args), p0, p1, p2, p3, p4, p5, p6, p7, p8); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args), p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); +} + +// Various overloads for ReturnNew(). +// +// The ReturnNew(a1, a2, ..., a_k) action returns a pointer to a new +// instance of type T, constructed on the heap with constructor arguments +// a1, a2, ..., and a_k. The caller assumes ownership of the returned value. +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_0_VALUE_PARAMS()) { + return new T(); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_1_VALUE_PARAMS(p0)) { + return new T(p0); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_2_VALUE_PARAMS(p0, p1)) { + return new T(p0, p1); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_3_VALUE_PARAMS(p0, p1, p2)) { + return new T(p0, p1, p2); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_4_VALUE_PARAMS(p0, p1, p2, p3)) { + return new T(p0, p1, p2, p3); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)) { + return new T(p0, p1, p2, p3, p4); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)) { + return new T(p0, p1, p2, p3, p4, p5); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)) { + return new T(p0, p1, p2, p3, p4, p5, p6); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)) { + return new T(p0, p1, p2, p3, p4, p5, p6, p7); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8)) { + return new T(p0, p1, p2, p3, p4, p5, p6, p7, p8); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)) { + return new T(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); +} + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +} // namespace testing + +// Include any custom actions added by the local installation. +// We must include this header at the end to make sure it can use the +// declarations from this file. +#include "gmock/internal/custom/gmock-generated-actions.h" + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-generated-actions.h.pump b/tools/external/googletest/googlemock/include/gmock/gmock-generated-actions.h.pump new file mode 100644 index 00000000..712f65d6 --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-generated-actions.h.pump @@ -0,0 +1,794 @@ +$$ -*- mode: c++; -*- +$$ This is a Pump source file. Please use Pump to convert it to +$$ gmock-generated-actions.h. +$$ +$var n = 10 $$ The maximum arity we support. +$$}} This meta comment fixes auto-indentation in editors. +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used variadic actions. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ + +#include "gmock/gmock-actions.h" +#include "gmock/internal/gmock-port.h" + +namespace testing { +namespace internal { + +// InvokeHelper knows how to unpack an N-tuple and invoke an N-ary +// function or method with the unpacked values, where F is a function +// type that takes N arguments. +template +class InvokeHelper; + + +$range i 0..n +$for i [[ +$range j 1..i +$var types = [[$for j [[, typename A$j]]]] +$var as = [[$for j, [[A$j]]]] +$var args = [[$if i==0 [[]] $else [[ args]]]] +$var gets = [[$for j, [[get<$(j - 1)>(args)]]]] +template +class InvokeHelper > { + public: + template + static R Invoke(Function function, const ::testing::tuple<$as>&$args) { + return function($gets); + } + + template + static R InvokeMethod(Class* obj_ptr, + MethodPtr method_ptr, + const ::testing::tuple<$as>&$args) { + return (obj_ptr->*method_ptr)($gets); + } +}; + + +]] +// An INTERNAL macro for extracting the type of a tuple field. It's +// subject to change without notice - DO NOT USE IN USER CODE! +#define GMOCK_FIELD_(Tuple, N) \ + typename ::testing::tuple_element::type + +$range i 1..n + +// SelectArgs::type is the +// type of an n-ary function whose i-th (1-based) argument type is the +// k{i}-th (0-based) field of ArgumentTuple, which must be a tuple +// type, and whose return type is Result. For example, +// SelectArgs, 0, 3>::type +// is int(bool, long). +// +// SelectArgs::Select(args) +// returns the selected fields (k1, k2, ..., k_n) of args as a tuple. +// For example, +// SelectArgs, 2, 0>::Select( +// ::testing::make_tuple(true, 'a', 2.5)) +// returns tuple (2.5, true). +// +// The numbers in list k1, k2, ..., k_n must be >= 0, where n can be +// in the range [0, $n]. Duplicates are allowed and they don't have +// to be in an ascending or descending order. + +template +class SelectArgs { + public: + typedef Result type($for i, [[GMOCK_FIELD_(ArgumentTuple, k$i)]]); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& args) { + return SelectedArgs($for i, [[get(args)]]); + } +}; + + +$for i [[ +$range j 1..n +$range j1 1..i-1 +template +class SelectArgs { + public: + typedef Result type($for j1, [[GMOCK_FIELD_(ArgumentTuple, k$j1)]]); + typedef typename Function::ArgumentTuple SelectedArgs; + static SelectedArgs Select(const ArgumentTuple& [[]] +$if i == 1 [[/* args */]] $else [[args]]) { + return SelectedArgs($for j1, [[get(args)]]); + } +}; + + +]] +#undef GMOCK_FIELD_ + +$var ks = [[$for i, [[k$i]]]] + +// Implements the WithArgs action. +template +class WithArgsAction { + public: + explicit WithArgsAction(const InnerAction& action) : action_(action) {} + + template + operator Action() const { return MakeAction(new Impl(action_)); } + + private: + template + class Impl : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + + explicit Impl(const InnerAction& action) : action_(action) {} + + virtual Result Perform(const ArgumentTuple& args) { + return action_.Perform(SelectArgs::Select(args)); + } + + private: + typedef typename SelectArgs::type InnerFunctionType; + + Action action_; + }; + + const InnerAction action_; + + GTEST_DISALLOW_ASSIGN_(WithArgsAction); +}; + +// A macro from the ACTION* family (defined later in this file) +// defines an action that can be used in a mock function. Typically, +// these actions only care about a subset of the arguments of the mock +// function. For example, if such an action only uses the second +// argument, it can be used in any mock function that takes >= 2 +// arguments where the type of the second argument is compatible. +// +// Therefore, the action implementation must be prepared to take more +// arguments than it needs. The ExcessiveArg type is used to +// represent those excessive arguments. In order to keep the compiler +// error messages tractable, we define it in the testing namespace +// instead of testing::internal. However, this is an INTERNAL TYPE +// and subject to change without notice, so a user MUST NOT USE THIS +// TYPE DIRECTLY. +struct ExcessiveArg {}; + +// A helper class needed for implementing the ACTION* macros. +template +class ActionHelper { + public: +$range i 0..n +$for i + +[[ +$var template = [[$if i==0 [[]] $else [[ +$range j 0..i-1 + template <$for j, [[typename A$j]]> +]]]] +$range j 0..i-1 +$var As = [[$for j, [[A$j]]]] +$var as = [[$for j, [[get<$j>(args)]]]] +$range k 1..n-i +$var eas = [[$for k, [[ExcessiveArg()]]]] +$var arg_list = [[$if (i==0) | (i==n) [[$as$eas]] $else [[$as, $eas]]]] +$template + static Result Perform(Impl* impl, const ::testing::tuple<$As>& args) { + return impl->template gmock_PerformImpl<$As>(args, $arg_list); + } + +]] +}; + +} // namespace internal + +// Various overloads for Invoke(). + +// WithArgs(an_action) creates an action that passes +// the selected arguments of the mock function to an_action and +// performs it. It serves as an adaptor between actions with +// different argument lists. C++ doesn't support default arguments for +// function templates, so we have to overload it. + +$range i 1..n +$for i [[ +$range j 1..i +template <$for j [[int k$j, ]]typename InnerAction> +inline internal::WithArgsAction +WithArgs(const InnerAction& action) { + return internal::WithArgsAction(action); +} + + +]] +// Creates an action that does actions a1, a2, ..., sequentially in +// each invocation. +$range i 2..n +$for i [[ +$range j 2..i +$var types = [[$for j, [[typename Action$j]]]] +$var Aas = [[$for j [[, Action$j a$j]]]] + +template +$range k 1..i-1 + +inline $for k [[internal::DoBothAction]] + +DoAll(Action1 a1$Aas) { +$if i==2 [[ + + return internal::DoBothAction(a1, a2); +]] $else [[ +$range j2 2..i + + return DoAll(a1, DoAll($for j2, [[a$j2]])); +]] + +} + +]] + +} // namespace testing + +// The ACTION* family of macros can be used in a namespace scope to +// define custom actions easily. The syntax: +// +// ACTION(name) { statements; } +// +// will define an action with the given name that executes the +// statements. The value returned by the statements will be used as +// the return value of the action. Inside the statements, you can +// refer to the K-th (0-based) argument of the mock function by +// 'argK', and refer to its type by 'argK_type'. For example: +// +// ACTION(IncrementArg1) { +// arg1_type temp = arg1; +// return ++(*temp); +// } +// +// allows you to write +// +// ...WillOnce(IncrementArg1()); +// +// You can also refer to the entire argument tuple and its type by +// 'args' and 'args_type', and refer to the mock function type and its +// return type by 'function_type' and 'return_type'. +// +// Note that you don't need to specify the types of the mock function +// arguments. However rest assured that your code is still type-safe: +// you'll get a compiler error if *arg1 doesn't support the ++ +// operator, or if the type of ++(*arg1) isn't compatible with the +// mock function's return type, for example. +// +// Sometimes you'll want to parameterize the action. For that you can use +// another macro: +// +// ACTION_P(name, param_name) { statements; } +// +// For example: +// +// ACTION_P(Add, n) { return arg0 + n; } +// +// will allow you to write: +// +// ...WillOnce(Add(5)); +// +// Note that you don't need to provide the type of the parameter +// either. If you need to reference the type of a parameter named +// 'foo', you can write 'foo_type'. For example, in the body of +// ACTION_P(Add, n) above, you can write 'n_type' to refer to the type +// of 'n'. +// +// We also provide ACTION_P2, ACTION_P3, ..., up to ACTION_P$n to support +// multi-parameter actions. +// +// For the purpose of typing, you can view +// +// ACTION_Pk(Foo, p1, ..., pk) { ... } +// +// as shorthand for +// +// template +// FooActionPk Foo(p1_type p1, ..., pk_type pk) { ... } +// +// In particular, you can provide the template type arguments +// explicitly when invoking Foo(), as in Foo(5, false); +// although usually you can rely on the compiler to infer the types +// for you automatically. You can assign the result of expression +// Foo(p1, ..., pk) to a variable of type FooActionPk. This can be useful when composing actions. +// +// You can also overload actions with different numbers of parameters: +// +// ACTION_P(Plus, a) { ... } +// ACTION_P2(Plus, a, b) { ... } +// +// While it's tempting to always use the ACTION* macros when defining +// a new action, you should also consider implementing ActionInterface +// or using MakePolymorphicAction() instead, especially if you need to +// use the action a lot. While these approaches require more work, +// they give you more control on the types of the mock function +// arguments and the action parameters, which in general leads to +// better compiler error messages that pay off in the long run. They +// also allow overloading actions based on parameter types (as opposed +// to just based on the number of parameters). +// +// CAVEAT: +// +// ACTION*() can only be used in a namespace scope. The reason is +// that C++ doesn't yet allow function-local types to be used to +// instantiate templates. The up-coming C++0x standard will fix this. +// Once that's done, we'll consider supporting using ACTION*() inside +// a function. +// +// MORE INFORMATION: +// +// To learn more about using these macros, please search for 'ACTION' +// on https://github.com/google/googletest/blob/master/googlemock/docs/CookBook.md + +$range i 0..n +$range k 0..n-1 + +// An internal macro needed for implementing ACTION*(). +#define GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_\ + const args_type& args GTEST_ATTRIBUTE_UNUSED_ +$for k [[, \ + arg$k[[]]_type arg$k GTEST_ATTRIBUTE_UNUSED_]] + + +// Sometimes you want to give an action explicit template parameters +// that cannot be inferred from its value parameters. ACTION() and +// ACTION_P*() don't support that. ACTION_TEMPLATE() remedies that +// and can be viewed as an extension to ACTION() and ACTION_P*(). +// +// The syntax: +// +// ACTION_TEMPLATE(ActionName, +// HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m), +// AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; } +// +// defines an action template that takes m explicit template +// parameters and n value parameters. name_i is the name of the i-th +// template parameter, and kind_i specifies whether it's a typename, +// an integral constant, or a template. p_i is the name of the i-th +// value parameter. +// +// Example: +// +// // DuplicateArg(output) converts the k-th argument of the mock +// // function to type T and copies it to *output. +// ACTION_TEMPLATE(DuplicateArg, +// HAS_2_TEMPLATE_PARAMS(int, k, typename, T), +// AND_1_VALUE_PARAMS(output)) { +// *output = T(::testing::get(args)); +// } +// ... +// int n; +// EXPECT_CALL(mock, Foo(_, _)) +// .WillOnce(DuplicateArg<1, unsigned char>(&n)); +// +// To create an instance of an action template, write: +// +// ActionName(v1, ..., v_n) +// +// where the ts are the template arguments and the vs are the value +// arguments. The value argument types are inferred by the compiler. +// If you want to explicitly specify the value argument types, you can +// provide additional template arguments: +// +// ActionName(v1, ..., v_n) +// +// where u_i is the desired type of v_i. +// +// ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded on the +// number of value parameters, but not on the number of template +// parameters. Without the restriction, the meaning of the following +// is unclear: +// +// OverloadedAction(x); +// +// Are we using a single-template-parameter action where 'bool' refers +// to the type of x, or are we using a two-template-parameter action +// where the compiler is asked to infer the type of x? +// +// Implementation notes: +// +// GMOCK_INTERNAL_*_HAS_m_TEMPLATE_PARAMS and +// GMOCK_INTERNAL_*_AND_n_VALUE_PARAMS are internal macros for +// implementing ACTION_TEMPLATE. The main trick we use is to create +// new macro invocations when expanding a macro. For example, we have +// +// #define ACTION_TEMPLATE(name, template_params, value_params) +// ... GMOCK_INTERNAL_DECL_##template_params ... +// +// which causes ACTION_TEMPLATE(..., HAS_1_TEMPLATE_PARAMS(typename, T), ...) +// to expand to +// +// ... GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(typename, T) ... +// +// Since GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS is a macro, the +// preprocessor will continue to expand it to +// +// ... typename T ... +// +// This technique conforms to the C++ standard and is portable. It +// allows us to implement action templates using O(N) code, where N is +// the maximum number of template/value parameters supported. Without +// using it, we'd have to devote O(N^2) amount of code to implement all +// combinations of m and n. + +// Declares the template parameters. + +$range j 1..n +$for j [[ +$range m 0..j-1 +#define GMOCK_INTERNAL_DECL_HAS_$j[[]] +_TEMPLATE_PARAMS($for m, [[kind$m, name$m]]) $for m, [[kind$m name$m]] + + +]] + +// Lists the template parameters. + +$for j [[ +$range m 0..j-1 +#define GMOCK_INTERNAL_LIST_HAS_$j[[]] +_TEMPLATE_PARAMS($for m, [[kind$m, name$m]]) $for m, [[name$m]] + + +]] + +// Declares the types of value parameters. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_DECL_TYPE_AND_$i[[]] +_VALUE_PARAMS($for j, [[p$j]]) $for j [[, typename p$j##_type]] + + +]] + +// Initializes the value parameters. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_INIT_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]])\ + ($for j, [[p$j##_type gmock_p$j]])$if i>0 [[ : ]]$for j, [[p$j(gmock_p$j)]] + + +]] + +// Declares the fields for storing the value parameters. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_DEFN_AND_$i[[]] +_VALUE_PARAMS($for j, [[p$j]]) $for j [[p$j##_type p$j; ]] + + +]] + +// Lists the value parameters. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_LIST_AND_$i[[]] +_VALUE_PARAMS($for j, [[p$j]]) $for j, [[p$j]] + + +]] + +// Lists the value parameter types. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_LIST_TYPE_AND_$i[[]] +_VALUE_PARAMS($for j, [[p$j]]) $for j [[, p$j##_type]] + + +]] + +// Declares the value parameters. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_DECL_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]]) [[]] +$for j, [[p$j##_type p$j]] + + +]] + +// The suffix of the class template implementing the action template. +$for i [[ + + +$range j 0..i-1 +#define GMOCK_INTERNAL_COUNT_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]]) [[]] +$if i==1 [[P]] $elif i>=2 [[P$i]] +]] + + +// The name of the class template implementing the action template. +#define GMOCK_ACTION_CLASS_(name, value_params)\ + GTEST_CONCAT_TOKEN_(name##Action, GMOCK_INTERNAL_COUNT_##value_params) + +$range k 0..n-1 + +#define ACTION_TEMPLATE(name, template_params, value_params)\ + template \ + class GMOCK_ACTION_CLASS_(name, value_params) {\ + public:\ + explicit GMOCK_ACTION_CLASS_(name, value_params)\ + GMOCK_INTERNAL_INIT_##value_params {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + explicit gmock_Impl GMOCK_INTERNAL_INIT_##value_params {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template <$for k, [[typename arg$k[[]]_type]]>\ + return_type gmock_PerformImpl(const args_type& args[[]] +$for k [[, arg$k[[]]_type arg$k]]) const;\ + GMOCK_INTERNAL_DEFN_##value_params\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(\ + new gmock_Impl(GMOCK_INTERNAL_LIST_##value_params));\ + }\ + GMOCK_INTERNAL_DEFN_##value_params\ + private:\ + GTEST_DISALLOW_ASSIGN_(GMOCK_ACTION_CLASS_(name, value_params));\ + };\ + template \ + inline GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params> name(\ + GMOCK_INTERNAL_DECL_##value_params) {\ + return GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params>(\ + GMOCK_INTERNAL_LIST_##value_params);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params>::gmock_Impl::\ + gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +$for i + +[[ +$var template = [[$if i==0 [[]] $else [[ +$range j 0..i-1 + + template <$for j, [[typename p$j##_type]]>\ +]]]] +$var class_name = [[name##Action[[$if i==0 [[]] $elif i==1 [[P]] + $else [[P$i]]]]]] +$range j 0..i-1 +$var ctor_param_list = [[$for j, [[p$j##_type gmock_p$j]]]] +$var param_types_and_names = [[$for j, [[p$j##_type p$j]]]] +$var inits = [[$if i==0 [[]] $else [[ : $for j, [[p$j(gmock_p$j)]]]]]] +$var param_field_decls = [[$for j +[[ + + p$j##_type p$j;\ +]]]] +$var param_field_decls2 = [[$for j +[[ + + p$j##_type p$j;\ +]]]] +$var params = [[$for j, [[p$j]]]] +$var param_types = [[$if i==0 [[]] $else [[<$for j, [[p$j##_type]]>]]]] +$var typename_arg_types = [[$for k, [[typename arg$k[[]]_type]]]] +$var arg_types_and_names = [[$for k, [[arg$k[[]]_type arg$k]]]] +$var macro_name = [[$if i==0 [[ACTION]] $elif i==1 [[ACTION_P]] + $else [[ACTION_P$i]]]] + +#define $macro_name(name$for j [[, p$j]])\$template + class $class_name {\ + public:\ + [[$if i==1 [[explicit ]]]]$class_name($ctor_param_list)$inits {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + [[$if i==1 [[explicit ]]]]gmock_Impl($ctor_param_list)$inits {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template <$typename_arg_types>\ + return_type gmock_PerformImpl(const args_type& args, [[]] +$arg_types_and_names) const;\$param_field_decls + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl($params));\ + }\$param_field_decls2 + private:\ + GTEST_DISALLOW_ASSIGN_($class_name);\ + };\$template + inline $class_name$param_types name($param_types_and_names) {\ + return $class_name$param_types($params);\ + }\$template + template \ + template <$typename_arg_types>\ + typename ::testing::internal::Function::Result\ + $class_name$param_types::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const +]] +$$ } // This meta comment fixes auto-indentation in Emacs. It won't +$$ // show up in the generated code. + + +namespace testing { + + +// The ACTION*() macros trigger warning C4100 (unreferenced formal +// parameter) in MSVC with -W4. Unfortunately they cannot be fixed in +// the macro definition, as the warnings are generated when the macro +// is expanded and macro expansion cannot contain #pragma. Therefore +// we suppress them here. +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable:4100) +#endif + +// Various overloads for InvokeArgument(). +// +// The InvokeArgument(a1, a2, ..., a_k) action invokes the N-th +// (0-based) argument, which must be a k-ary callable, of the mock +// function, with arguments a1, a2, ..., a_k. +// +// Notes: +// +// 1. The arguments are passed by value by default. If you need to +// pass an argument by reference, wrap it inside ByRef(). For +// example, +// +// InvokeArgument<1>(5, string("Hello"), ByRef(foo)) +// +// passes 5 and string("Hello") by value, and passes foo by +// reference. +// +// 2. If the callable takes an argument by reference but ByRef() is +// not used, it will receive the reference to a copy of the value, +// instead of the original value. For example, when the 0-th +// argument of the mock function takes a const string&, the action +// +// InvokeArgument<0>(string("Hello")) +// +// makes a copy of the temporary string("Hello") object and passes a +// reference of the copy, instead of the original temporary object, +// to the callable. This makes it easy for a user to define an +// InvokeArgument action from temporary values and have it performed +// later. + +namespace internal { +namespace invoke_argument { + +// Appears in InvokeArgumentAdl's argument list to help avoid +// accidental calls to user functions of the same name. +struct AdlTag {}; + +// InvokeArgumentAdl - a helper for InvokeArgument. +// The basic overloads are provided here for generic functors. +// Overloads for other custom-callables are provided in the +// internal/custom/callback-actions.h header. + +$range i 0..n +$for i +[[ +$range j 1..i + +template +R InvokeArgumentAdl(AdlTag, F f[[$for j [[, A$j a$j]]]]) { + return f([[$for j, [[a$j]]]]); +} +]] + +} // namespace invoke_argument +} // namespace internal + +$range i 0..n +$for i [[ +$range j 0..i-1 + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]])) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::testing::get(args)$for j [[, p$j]]); +} + +]] + +// Various overloads for ReturnNew(). +// +// The ReturnNew(a1, a2, ..., a_k) action returns a pointer to a new +// instance of type T, constructed on the heap with constructor arguments +// a1, a2, ..., and a_k. The caller assumes ownership of the returned value. +$range i 0..n +$for i [[ +$range j 0..i-1 +$var ps = [[$for j, [[p$j]]]] + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_$i[[]]_VALUE_PARAMS($ps)) { + return new T($ps); +} + +]] + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +} // namespace testing + +// Include any custom callback actions added by the local installation. +// We must include this header at the end to make sure it can use the +// declarations from this file. +#include "gmock/internal/custom/gmock-generated-actions.h" + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-generated-function-mockers.h b/tools/external/googletest/googlemock/include/gmock/gmock-generated-function-mockers.h new file mode 100644 index 00000000..4fa5ca94 --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-generated-function-mockers.h @@ -0,0 +1,1095 @@ +// This file was GENERATED by command: +// pump.py gmock-generated-function-mockers.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements function mockers of various arities. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ + +#include "gmock/gmock-spec-builders.h" +#include "gmock/internal/gmock-internal-utils.h" + +#if GTEST_HAS_STD_FUNCTION_ +# include +#endif + +namespace testing { +namespace internal { + +template +class FunctionMockerBase; + +// Note: class FunctionMocker really belongs to the ::testing +// namespace. However if we define it in ::testing, MSVC will +// complain when classes in ::testing::internal declare it as a +// friend class template. To workaround this compiler bug, we define +// FunctionMocker in ::testing::internal and import it into ::testing. +template +class FunctionMocker; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With() { + return this->current_spec(); + } + + R Invoke() { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple()); + } +}; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(A1); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With(const Matcher& m1) { + this->current_spec().SetMatchers(::testing::make_tuple(m1)); + return this->current_spec(); + } + + R Invoke(A1 a1) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple(a1)); + } +}; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(A1, A2); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With(const Matcher& m1, const Matcher& m2) { + this->current_spec().SetMatchers(::testing::make_tuple(m1, m2)); + return this->current_spec(); + } + + R Invoke(A1 a1, A2 a2) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple(a1, a2)); + } +}; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(A1, A2, A3); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With(const Matcher& m1, const Matcher& m2, + const Matcher& m3) { + this->current_spec().SetMatchers(::testing::make_tuple(m1, m2, m3)); + return this->current_spec(); + } + + R Invoke(A1 a1, A2 a2, A3 a3) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple(a1, a2, a3)); + } +}; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(A1, A2, A3, A4); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With(const Matcher& m1, const Matcher& m2, + const Matcher& m3, const Matcher& m4) { + this->current_spec().SetMatchers(::testing::make_tuple(m1, m2, m3, m4)); + return this->current_spec(); + } + + R Invoke(A1 a1, A2 a2, A3 a3, A4 a4) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple(a1, a2, a3, a4)); + } +}; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(A1, A2, A3, A4, A5); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With(const Matcher& m1, const Matcher& m2, + const Matcher& m3, const Matcher& m4, const Matcher& m5) { + this->current_spec().SetMatchers(::testing::make_tuple(m1, m2, m3, m4, m5)); + return this->current_spec(); + } + + R Invoke(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple(a1, a2, a3, a4, a5)); + } +}; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(A1, A2, A3, A4, A5, A6); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With(const Matcher& m1, const Matcher& m2, + const Matcher& m3, const Matcher& m4, const Matcher& m5, + const Matcher& m6) { + this->current_spec().SetMatchers(::testing::make_tuple(m1, m2, m3, m4, m5, + m6)); + return this->current_spec(); + } + + R Invoke(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple(a1, a2, a3, a4, a5, a6)); + } +}; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(A1, A2, A3, A4, A5, A6, A7); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With(const Matcher& m1, const Matcher& m2, + const Matcher& m3, const Matcher& m4, const Matcher& m5, + const Matcher& m6, const Matcher& m7) { + this->current_spec().SetMatchers(::testing::make_tuple(m1, m2, m3, m4, m5, + m6, m7)); + return this->current_spec(); + } + + R Invoke(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple(a1, a2, a3, a4, a5, a6, a7)); + } +}; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(A1, A2, A3, A4, A5, A6, A7, A8); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With(const Matcher& m1, const Matcher& m2, + const Matcher& m3, const Matcher& m4, const Matcher& m5, + const Matcher& m6, const Matcher& m7, const Matcher& m8) { + this->current_spec().SetMatchers(::testing::make_tuple(m1, m2, m3, m4, m5, + m6, m7, m8)); + return this->current_spec(); + } + + R Invoke(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7, A8 a8) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple(a1, a2, a3, a4, a5, a6, a7, a8)); + } +}; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(A1, A2, A3, A4, A5, A6, A7, A8, A9); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With(const Matcher& m1, const Matcher& m2, + const Matcher& m3, const Matcher& m4, const Matcher& m5, + const Matcher& m6, const Matcher& m7, const Matcher& m8, + const Matcher& m9) { + this->current_spec().SetMatchers(::testing::make_tuple(m1, m2, m3, m4, m5, + m6, m7, m8, m9)); + return this->current_spec(); + } + + R Invoke(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7, A8 a8, A9 a9) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple(a1, a2, a3, a4, a5, a6, a7, a8, a9)); + } +}; + +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F(A1, A2, A3, A4, A5, A6, A7, A8, A9, A10); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With(const Matcher& m1, const Matcher& m2, + const Matcher& m3, const Matcher& m4, const Matcher& m5, + const Matcher& m6, const Matcher& m7, const Matcher& m8, + const Matcher& m9, const Matcher& m10) { + this->current_spec().SetMatchers(::testing::make_tuple(m1, m2, m3, m4, m5, + m6, m7, m8, m9, m10)); + return this->current_spec(); + } + + R Invoke(A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7, A8 a8, A9 a9, + A10 a10) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple(a1, a2, a3, a4, a5, a6, a7, a8, a9, + a10)); + } +}; + +} // namespace internal + +// The style guide prohibits "using" statements in a namespace scope +// inside a header file. However, the FunctionMocker class template +// is meant to be defined in the ::testing namespace. The following +// line is just a trick for working around a bug in MSVC 8.0, which +// cannot handle it if we define FunctionMocker in ::testing. +using internal::FunctionMocker; + +// GMOCK_RESULT_(tn, F) expands to the result type of function type F. +// We define this as a variadic macro in case F contains unprotected +// commas (the same reason that we use variadic macros in other places +// in this file). +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_RESULT_(tn, ...) \ + tn ::testing::internal::Function<__VA_ARGS__>::Result + +// The type of argument N of the given function type. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_ARG_(tn, N, ...) \ + tn ::testing::internal::Function<__VA_ARGS__>::Argument##N + +// The matcher type for argument N of the given function type. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_MATCHER_(tn, N, ...) \ + const ::testing::Matcher& + +// The variable for mocking the given method. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_MOCKER_(arity, constness, Method) \ + GTEST_CONCAT_TOKEN_(gmock##constness##arity##_##Method##_, __LINE__) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD0_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + ) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 0), \ + this_method_does_not_take_0_arguments); \ + GMOCK_MOCKER_(0, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(0, constness, Method).Invoke(); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method() constness { \ + GMOCK_MOCKER_(0, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(0, constness, Method).With(); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(0, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD1_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 1), \ + this_method_does_not_take_1_argument); \ + GMOCK_MOCKER_(1, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(1, constness, Method).Invoke(gmock_a1); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1) constness { \ + GMOCK_MOCKER_(1, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(1, constness, Method).With(gmock_a1); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(1, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD2_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_ARG_(tn, 2, __VA_ARGS__) gmock_a2) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 2), \ + this_method_does_not_take_2_arguments); \ + GMOCK_MOCKER_(2, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(2, constness, Method).Invoke(gmock_a1, gmock_a2); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2) constness { \ + GMOCK_MOCKER_(2, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(2, constness, Method).With(gmock_a1, gmock_a2); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(2, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD3_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_ARG_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 3), \ + this_method_does_not_take_3_arguments); \ + GMOCK_MOCKER_(3, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(3, constness, Method).Invoke(gmock_a1, gmock_a2, \ + gmock_a3); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3) constness { \ + GMOCK_MOCKER_(3, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(3, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(3, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD4_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_ARG_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 4), \ + this_method_does_not_take_4_arguments); \ + GMOCK_MOCKER_(4, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(4, constness, Method).Invoke(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4) constness { \ + GMOCK_MOCKER_(4, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(4, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(4, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD5_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_ARG_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_ARG_(tn, 5, __VA_ARGS__) gmock_a5) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 5), \ + this_method_does_not_take_5_arguments); \ + GMOCK_MOCKER_(5, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(5, constness, Method).Invoke(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5) constness { \ + GMOCK_MOCKER_(5, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(5, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(5, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD6_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_ARG_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_ARG_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 6), \ + this_method_does_not_take_6_arguments); \ + GMOCK_MOCKER_(6, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(6, constness, Method).Invoke(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6) constness { \ + GMOCK_MOCKER_(6, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(6, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(6, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD7_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_ARG_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_ARG_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 7), \ + this_method_does_not_take_7_arguments); \ + GMOCK_MOCKER_(7, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(7, constness, Method).Invoke(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7) constness { \ + GMOCK_MOCKER_(7, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(7, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(7, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD8_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_ARG_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_ARG_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7, \ + GMOCK_ARG_(tn, 8, __VA_ARGS__) gmock_a8) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 8), \ + this_method_does_not_take_8_arguments); \ + GMOCK_MOCKER_(8, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(8, constness, Method).Invoke(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7, \ + GMOCK_MATCHER_(tn, 8, __VA_ARGS__) gmock_a8) constness { \ + GMOCK_MOCKER_(8, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(8, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(8, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD9_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_ARG_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_ARG_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7, \ + GMOCK_ARG_(tn, 8, __VA_ARGS__) gmock_a8, \ + GMOCK_ARG_(tn, 9, __VA_ARGS__) gmock_a9) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 9), \ + this_method_does_not_take_9_arguments); \ + GMOCK_MOCKER_(9, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(9, constness, Method).Invoke(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8, \ + gmock_a9); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7, \ + GMOCK_MATCHER_(tn, 8, __VA_ARGS__) gmock_a8, \ + GMOCK_MATCHER_(tn, 9, __VA_ARGS__) gmock_a9) constness { \ + GMOCK_MOCKER_(9, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(9, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8, \ + gmock_a9); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(9, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD10_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_ARG_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_ARG_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7, \ + GMOCK_ARG_(tn, 8, __VA_ARGS__) gmock_a8, \ + GMOCK_ARG_(tn, 9, __VA_ARGS__) gmock_a9, \ + GMOCK_ARG_(tn, 10, __VA_ARGS__) gmock_a10) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value \ + == 10), \ + this_method_does_not_take_10_arguments); \ + GMOCK_MOCKER_(10, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(10, constness, Method).Invoke(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8, gmock_a9, \ + gmock_a10); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7, \ + GMOCK_MATCHER_(tn, 8, __VA_ARGS__) gmock_a8, \ + GMOCK_MATCHER_(tn, 9, __VA_ARGS__) gmock_a9, \ + GMOCK_MATCHER_(tn, 10, \ + __VA_ARGS__) gmock_a10) constness { \ + GMOCK_MOCKER_(10, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(10, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8, gmock_a9, \ + gmock_a10); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(10, constness, \ + Method) + +#define MOCK_METHOD0(m, ...) GMOCK_METHOD0_(, , , m, __VA_ARGS__) +#define MOCK_METHOD1(m, ...) GMOCK_METHOD1_(, , , m, __VA_ARGS__) +#define MOCK_METHOD2(m, ...) GMOCK_METHOD2_(, , , m, __VA_ARGS__) +#define MOCK_METHOD3(m, ...) GMOCK_METHOD3_(, , , m, __VA_ARGS__) +#define MOCK_METHOD4(m, ...) GMOCK_METHOD4_(, , , m, __VA_ARGS__) +#define MOCK_METHOD5(m, ...) GMOCK_METHOD5_(, , , m, __VA_ARGS__) +#define MOCK_METHOD6(m, ...) GMOCK_METHOD6_(, , , m, __VA_ARGS__) +#define MOCK_METHOD7(m, ...) GMOCK_METHOD7_(, , , m, __VA_ARGS__) +#define MOCK_METHOD8(m, ...) GMOCK_METHOD8_(, , , m, __VA_ARGS__) +#define MOCK_METHOD9(m, ...) GMOCK_METHOD9_(, , , m, __VA_ARGS__) +#define MOCK_METHOD10(m, ...) GMOCK_METHOD10_(, , , m, __VA_ARGS__) + +#define MOCK_CONST_METHOD0(m, ...) GMOCK_METHOD0_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD1(m, ...) GMOCK_METHOD1_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD2(m, ...) GMOCK_METHOD2_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD3(m, ...) GMOCK_METHOD3_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD4(m, ...) GMOCK_METHOD4_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD5(m, ...) GMOCK_METHOD5_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD6(m, ...) GMOCK_METHOD6_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD7(m, ...) GMOCK_METHOD7_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD8(m, ...) GMOCK_METHOD8_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD9(m, ...) GMOCK_METHOD9_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD10(m, ...) GMOCK_METHOD10_(, const, , m, __VA_ARGS__) + +#define MOCK_METHOD0_T(m, ...) GMOCK_METHOD0_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD1_T(m, ...) GMOCK_METHOD1_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD2_T(m, ...) GMOCK_METHOD2_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD3_T(m, ...) GMOCK_METHOD3_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD4_T(m, ...) GMOCK_METHOD4_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD5_T(m, ...) GMOCK_METHOD5_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD6_T(m, ...) GMOCK_METHOD6_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD7_T(m, ...) GMOCK_METHOD7_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD8_T(m, ...) GMOCK_METHOD8_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD9_T(m, ...) GMOCK_METHOD9_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD10_T(m, ...) GMOCK_METHOD10_(typename, , , m, __VA_ARGS__) + +#define MOCK_CONST_METHOD0_T(m, ...) \ + GMOCK_METHOD0_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD1_T(m, ...) \ + GMOCK_METHOD1_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD2_T(m, ...) \ + GMOCK_METHOD2_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD3_T(m, ...) \ + GMOCK_METHOD3_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD4_T(m, ...) \ + GMOCK_METHOD4_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD5_T(m, ...) \ + GMOCK_METHOD5_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD6_T(m, ...) \ + GMOCK_METHOD6_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD7_T(m, ...) \ + GMOCK_METHOD7_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD8_T(m, ...) \ + GMOCK_METHOD8_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD9_T(m, ...) \ + GMOCK_METHOD9_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD10_T(m, ...) \ + GMOCK_METHOD10_(typename, const, , m, __VA_ARGS__) + +#define MOCK_METHOD0_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD0_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD1_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD1_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD2_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD2_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD3_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD3_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD4_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD4_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD5_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD5_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD6_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD6_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD7_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD7_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD8_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD8_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD9_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD9_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD10_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD10_(, , ct, m, __VA_ARGS__) + +#define MOCK_CONST_METHOD0_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD0_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD1_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD1_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD2_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD2_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD3_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD3_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD4_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD4_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD5_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD5_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD6_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD6_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD7_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD7_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD8_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD8_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD9_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD9_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD10_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD10_(, const, ct, m, __VA_ARGS__) + +#define MOCK_METHOD0_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD0_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD1_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD1_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD2_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD2_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD3_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD3_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD4_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD4_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD5_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD5_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD6_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD6_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD7_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD7_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD8_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD8_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD9_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD9_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD10_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD10_(typename, , ct, m, __VA_ARGS__) + +#define MOCK_CONST_METHOD0_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD0_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD1_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD1_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD2_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD2_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD3_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD3_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD4_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD4_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD5_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD5_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD6_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD6_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD7_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD7_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD8_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD8_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD9_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD9_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD10_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD10_(typename, const, ct, m, __VA_ARGS__) + +// A MockFunction class has one mock method whose type is F. It is +// useful when you just want your test code to emit some messages and +// have Google Mock verify the right messages are sent (and perhaps at +// the right times). For example, if you are exercising code: +// +// Foo(1); +// Foo(2); +// Foo(3); +// +// and want to verify that Foo(1) and Foo(3) both invoke +// mock.Bar("a"), but Foo(2) doesn't invoke anything, you can write: +// +// TEST(FooTest, InvokesBarCorrectly) { +// MyMock mock; +// MockFunction check; +// { +// InSequence s; +// +// EXPECT_CALL(mock, Bar("a")); +// EXPECT_CALL(check, Call("1")); +// EXPECT_CALL(check, Call("2")); +// EXPECT_CALL(mock, Bar("a")); +// } +// Foo(1); +// check.Call("1"); +// Foo(2); +// check.Call("2"); +// Foo(3); +// } +// +// The expectation spec says that the first Bar("a") must happen +// before check point "1", the second Bar("a") must happen after check +// point "2", and nothing should happen between the two check +// points. The explicit check points make it easy to tell which +// Bar("a") is called by which call to Foo(). +// +// MockFunction can also be used to exercise code that accepts +// std::function callbacks. To do so, use AsStdFunction() method +// to create std::function proxy forwarding to original object's Call. +// Example: +// +// TEST(FooTest, RunsCallbackWithBarArgument) { +// MockFunction callback; +// EXPECT_CALL(callback, Call("bar")).WillOnce(Return(1)); +// Foo(callback.AsStdFunction()); +// } +template +class MockFunction; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD0_T(Call, R()); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this]() -> R { + return this->Call(); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD1_T(Call, R(A0)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this](A0 a0) -> R { + return this->Call(a0); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD2_T(Call, R(A0, A1)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this](A0 a0, A1 a1) -> R { + return this->Call(a0, a1); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD3_T(Call, R(A0, A1, A2)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this](A0 a0, A1 a1, A2 a2) -> R { + return this->Call(a0, a1, a2); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD4_T(Call, R(A0, A1, A2, A3)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this](A0 a0, A1 a1, A2 a2, A3 a3) -> R { + return this->Call(a0, a1, a2, a3); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD5_T(Call, R(A0, A1, A2, A3, A4)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this](A0 a0, A1 a1, A2 a2, A3 a3, A4 a4) -> R { + return this->Call(a0, a1, a2, a3, a4); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD6_T(Call, R(A0, A1, A2, A3, A4, A5)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this](A0 a0, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) -> R { + return this->Call(a0, a1, a2, a3, a4, a5); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD7_T(Call, R(A0, A1, A2, A3, A4, A5, A6)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this](A0 a0, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6) -> R { + return this->Call(a0, a1, a2, a3, a4, a5, a6); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD8_T(Call, R(A0, A1, A2, A3, A4, A5, A6, A7)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this](A0 a0, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7) -> R { + return this->Call(a0, a1, a2, a3, a4, a5, a6, a7); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD9_T(Call, R(A0, A1, A2, A3, A4, A5, A6, A7, A8)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this](A0 a0, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7, + A8 a8) -> R { + return this->Call(a0, a1, a2, a3, a4, a5, a6, a7, a8); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD10_T(Call, R(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this](A0 a0, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7, + A8 a8, A9 a9) -> R { + return this->Call(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + +} // namespace testing + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-generated-function-mockers.h.pump b/tools/external/googletest/googlemock/include/gmock/gmock-generated-function-mockers.h.pump new file mode 100644 index 00000000..811502d0 --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-generated-function-mockers.h.pump @@ -0,0 +1,291 @@ +$$ -*- mode: c++; -*- +$$ This is a Pump source file. Please use Pump to convert it to +$$ gmock-generated-function-mockers.h. +$$ +$var n = 10 $$ The maximum arity we support. +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements function mockers of various arities. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ + +#include "gmock/gmock-spec-builders.h" +#include "gmock/internal/gmock-internal-utils.h" + +#if GTEST_HAS_STD_FUNCTION_ +# include +#endif + +namespace testing { +namespace internal { + +template +class FunctionMockerBase; + +// Note: class FunctionMocker really belongs to the ::testing +// namespace. However if we define it in ::testing, MSVC will +// complain when classes in ::testing::internal declare it as a +// friend class template. To workaround this compiler bug, we define +// FunctionMocker in ::testing::internal and import it into ::testing. +template +class FunctionMocker; + + +$range i 0..n +$for i [[ +$range j 1..i +$var typename_As = [[$for j [[, typename A$j]]]] +$var As = [[$for j, [[A$j]]]] +$var as = [[$for j, [[a$j]]]] +$var Aas = [[$for j, [[A$j a$j]]]] +$var ms = [[$for j, [[m$j]]]] +$var matchers = [[$for j, [[const Matcher& m$j]]]] +template +class FunctionMocker : public + internal::FunctionMockerBase { + public: + typedef R F($As); + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + MockSpec& With($matchers) { + +$if i >= 1 [[ + this->current_spec().SetMatchers(::testing::make_tuple($ms)); + +]] + return this->current_spec(); + } + + R Invoke($Aas) { + // Even though gcc and MSVC don't enforce it, 'this->' is required + // by the C++ standard [14.6.4] here, as the base class type is + // dependent on the template argument (and thus shouldn't be + // looked into when resolving InvokeWith). + return this->InvokeWith(ArgumentTuple($as)); + } +}; + + +]] +} // namespace internal + +// The style guide prohibits "using" statements in a namespace scope +// inside a header file. However, the FunctionMocker class template +// is meant to be defined in the ::testing namespace. The following +// line is just a trick for working around a bug in MSVC 8.0, which +// cannot handle it if we define FunctionMocker in ::testing. +using internal::FunctionMocker; + +// GMOCK_RESULT_(tn, F) expands to the result type of function type F. +// We define this as a variadic macro in case F contains unprotected +// commas (the same reason that we use variadic macros in other places +// in this file). +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_RESULT_(tn, ...) \ + tn ::testing::internal::Function<__VA_ARGS__>::Result + +// The type of argument N of the given function type. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_ARG_(tn, N, ...) \ + tn ::testing::internal::Function<__VA_ARGS__>::Argument##N + +// The matcher type for argument N of the given function type. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_MATCHER_(tn, N, ...) \ + const ::testing::Matcher& + +// The variable for mocking the given method. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_MOCKER_(arity, constness, Method) \ + GTEST_CONCAT_TOKEN_(gmock##constness##arity##_##Method##_, __LINE__) + + +$for i [[ +$range j 1..i +$var arg_as = [[$for j, \ + [[GMOCK_ARG_(tn, $j, __VA_ARGS__) gmock_a$j]]]] +$var as = [[$for j, [[gmock_a$j]]]] +$var matcher_as = [[$for j, \ + [[GMOCK_MATCHER_(tn, $j, __VA_ARGS__) gmock_a$j]]]] +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD$i[[]]_(tn, constness, ct, Method, ...) \ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + $arg_as) constness { \ + GTEST_COMPILE_ASSERT_((::testing::tuple_size< \ + tn ::testing::internal::Function<__VA_ARGS__>::ArgumentTuple>::value == $i), \ + this_method_does_not_take_$i[[]]_argument[[$if i != 1 [[s]]]]); \ + GMOCK_MOCKER_($i, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_($i, constness, Method).Invoke($as); \ + } \ + ::testing::MockSpec<__VA_ARGS__>& \ + gmock_##Method($matcher_as) constness { \ + GMOCK_MOCKER_($i, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_($i, constness, Method).With($as); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_($i, constness, Method) + + +]] +$for i [[ +#define MOCK_METHOD$i(m, ...) GMOCK_METHOD$i[[]]_(, , , m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_CONST_METHOD$i(m, ...) GMOCK_METHOD$i[[]]_(, const, , m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_METHOD$i[[]]_T(m, ...) GMOCK_METHOD$i[[]]_(typename, , , m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_CONST_METHOD$i[[]]_T(m, ...) \ + GMOCK_METHOD$i[[]]_(typename, const, , m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_METHOD$i[[]]_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD$i[[]]_(, , ct, m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_CONST_METHOD$i[[]]_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD$i[[]]_(, const, ct, m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_METHOD$i[[]]_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD$i[[]]_(typename, , ct, m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_CONST_METHOD$i[[]]_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD$i[[]]_(typename, const, ct, m, __VA_ARGS__) + +]] + +// A MockFunction class has one mock method whose type is F. It is +// useful when you just want your test code to emit some messages and +// have Google Mock verify the right messages are sent (and perhaps at +// the right times). For example, if you are exercising code: +// +// Foo(1); +// Foo(2); +// Foo(3); +// +// and want to verify that Foo(1) and Foo(3) both invoke +// mock.Bar("a"), but Foo(2) doesn't invoke anything, you can write: +// +// TEST(FooTest, InvokesBarCorrectly) { +// MyMock mock; +// MockFunction check; +// { +// InSequence s; +// +// EXPECT_CALL(mock, Bar("a")); +// EXPECT_CALL(check, Call("1")); +// EXPECT_CALL(check, Call("2")); +// EXPECT_CALL(mock, Bar("a")); +// } +// Foo(1); +// check.Call("1"); +// Foo(2); +// check.Call("2"); +// Foo(3); +// } +// +// The expectation spec says that the first Bar("a") must happen +// before check point "1", the second Bar("a") must happen after check +// point "2", and nothing should happen between the two check +// points. The explicit check points make it easy to tell which +// Bar("a") is called by which call to Foo(). +// +// MockFunction can also be used to exercise code that accepts +// std::function callbacks. To do so, use AsStdFunction() method +// to create std::function proxy forwarding to original object's Call. +// Example: +// +// TEST(FooTest, RunsCallbackWithBarArgument) { +// MockFunction callback; +// EXPECT_CALL(callback, Call("bar")).WillOnce(Return(1)); +// Foo(callback.AsStdFunction()); +// } +template +class MockFunction; + + +$for i [[ +$range j 0..i-1 +$var ArgTypes = [[$for j, [[A$j]]]] +$var ArgNames = [[$for j, [[a$j]]]] +$var ArgDecls = [[$for j, [[A$j a$j]]]] +template +class MockFunction { + public: + MockFunction() {} + + MOCK_METHOD$i[[]]_T(Call, R($ArgTypes)); + +#if GTEST_HAS_STD_FUNCTION_ + std::function AsStdFunction() { + return [this]($ArgDecls) -> R { + return this->Call($ArgNames); + }; + } +#endif // GTEST_HAS_STD_FUNCTION_ + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(MockFunction); +}; + + +]] +} // namespace testing + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-generated-matchers.h b/tools/external/googletest/googlemock/include/gmock/gmock-generated-matchers.h new file mode 100644 index 00000000..1655bcd3 --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-generated-matchers.h @@ -0,0 +1,2191 @@ +// This file was GENERATED by command: +// pump.py gmock-generated-matchers.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used variadic matchers. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ + +#include +#include +#include +#include +#include "gmock/gmock-matchers.h" + +namespace testing { +namespace internal { + +// The type of the i-th (0-based) field of Tuple. +#define GMOCK_FIELD_TYPE_(Tuple, i) \ + typename ::testing::tuple_element::type + +// TupleFields is for selecting fields from a +// tuple of type Tuple. It has two members: +// +// type: a tuple type whose i-th field is the ki-th field of Tuple. +// GetSelectedFields(t): returns fields k0, ..., and kn of t as a tuple. +// +// For example, in class TupleFields, 2, 0>, we have: +// +// type is tuple, and +// GetSelectedFields(make_tuple(true, 'a', 42)) is (42, true). + +template +class TupleFields; + +// This generic version is used when there are 10 selectors. +template +class TupleFields { + public: + typedef ::testing::tuple type; + static type GetSelectedFields(const Tuple& t) { + return type(get(t), get(t), get(t), get(t), get(t), + get(t), get(t), get(t), get(t), get(t)); + } +}; + +// The following specialization is used for 0 ~ 9 selectors. + +template +class TupleFields { + public: + typedef ::testing::tuple<> type; + static type GetSelectedFields(const Tuple& /* t */) { + return type(); + } +}; + +template +class TupleFields { + public: + typedef ::testing::tuple type; + static type GetSelectedFields(const Tuple& t) { + return type(get(t)); + } +}; + +template +class TupleFields { + public: + typedef ::testing::tuple type; + static type GetSelectedFields(const Tuple& t) { + return type(get(t), get(t)); + } +}; + +template +class TupleFields { + public: + typedef ::testing::tuple type; + static type GetSelectedFields(const Tuple& t) { + return type(get(t), get(t), get(t)); + } +}; + +template +class TupleFields { + public: + typedef ::testing::tuple type; + static type GetSelectedFields(const Tuple& t) { + return type(get(t), get(t), get(t), get(t)); + } +}; + +template +class TupleFields { + public: + typedef ::testing::tuple type; + static type GetSelectedFields(const Tuple& t) { + return type(get(t), get(t), get(t), get(t), get(t)); + } +}; + +template +class TupleFields { + public: + typedef ::testing::tuple type; + static type GetSelectedFields(const Tuple& t) { + return type(get(t), get(t), get(t), get(t), get(t), + get(t)); + } +}; + +template +class TupleFields { + public: + typedef ::testing::tuple type; + static type GetSelectedFields(const Tuple& t) { + return type(get(t), get(t), get(t), get(t), get(t), + get(t), get(t)); + } +}; + +template +class TupleFields { + public: + typedef ::testing::tuple type; + static type GetSelectedFields(const Tuple& t) { + return type(get(t), get(t), get(t), get(t), get(t), + get(t), get(t), get(t)); + } +}; + +template +class TupleFields { + public: + typedef ::testing::tuple type; + static type GetSelectedFields(const Tuple& t) { + return type(get(t), get(t), get(t), get(t), get(t), + get(t), get(t), get(t), get(t)); + } +}; + +#undef GMOCK_FIELD_TYPE_ + +// Implements the Args() matcher. +template +class ArgsMatcherImpl : public MatcherInterface { + public: + // ArgsTuple may have top-level const or reference modifiers. + typedef GTEST_REMOVE_REFERENCE_AND_CONST_(ArgsTuple) RawArgsTuple; + typedef typename internal::TupleFields::type SelectedArgs; + typedef Matcher MonomorphicInnerMatcher; + + template + explicit ArgsMatcherImpl(const InnerMatcher& inner_matcher) + : inner_matcher_(SafeMatcherCast(inner_matcher)) {} + + virtual bool MatchAndExplain(ArgsTuple args, + MatchResultListener* listener) const { + const SelectedArgs& selected_args = GetSelectedArgs(args); + if (!listener->IsInterested()) + return inner_matcher_.Matches(selected_args); + + PrintIndices(listener->stream()); + *listener << "are " << PrintToString(selected_args); + + StringMatchResultListener inner_listener; + const bool match = inner_matcher_.MatchAndExplain(selected_args, + &inner_listener); + PrintIfNotEmpty(inner_listener.str(), listener->stream()); + return match; + } + + virtual void DescribeTo(::std::ostream* os) const { + *os << "are a tuple "; + PrintIndices(os); + inner_matcher_.DescribeTo(os); + } + + virtual void DescribeNegationTo(::std::ostream* os) const { + *os << "are a tuple "; + PrintIndices(os); + inner_matcher_.DescribeNegationTo(os); + } + + private: + static SelectedArgs GetSelectedArgs(ArgsTuple args) { + return TupleFields::GetSelectedFields(args); + } + + // Prints the indices of the selected fields. + static void PrintIndices(::std::ostream* os) { + *os << "whose fields ("; + const int indices[10] = { k0, k1, k2, k3, k4, k5, k6, k7, k8, k9 }; + for (int i = 0; i < 10; i++) { + if (indices[i] < 0) + break; + + if (i >= 1) + *os << ", "; + + *os << "#" << indices[i]; + } + *os << ") "; + } + + const MonomorphicInnerMatcher inner_matcher_; + + GTEST_DISALLOW_ASSIGN_(ArgsMatcherImpl); +}; + +template +class ArgsMatcher { + public: + explicit ArgsMatcher(const InnerMatcher& inner_matcher) + : inner_matcher_(inner_matcher) {} + + template + operator Matcher() const { + return MakeMatcher(new ArgsMatcherImpl(inner_matcher_)); + } + + private: + const InnerMatcher inner_matcher_; + + GTEST_DISALLOW_ASSIGN_(ArgsMatcher); +}; + +// A set of metafunctions for computing the result type of AllOf. +// AllOf(m1, ..., mN) returns +// AllOfResultN::type. + +// Although AllOf isn't defined for one argument, AllOfResult1 is defined +// to simplify the implementation. +template +struct AllOfResult1 { + typedef M1 type; +}; + +template +struct AllOfResult2 { + typedef BothOfMatcher< + typename AllOfResult1::type, + typename AllOfResult1::type + > type; +}; + +template +struct AllOfResult3 { + typedef BothOfMatcher< + typename AllOfResult1::type, + typename AllOfResult2::type + > type; +}; + +template +struct AllOfResult4 { + typedef BothOfMatcher< + typename AllOfResult2::type, + typename AllOfResult2::type + > type; +}; + +template +struct AllOfResult5 { + typedef BothOfMatcher< + typename AllOfResult2::type, + typename AllOfResult3::type + > type; +}; + +template +struct AllOfResult6 { + typedef BothOfMatcher< + typename AllOfResult3::type, + typename AllOfResult3::type + > type; +}; + +template +struct AllOfResult7 { + typedef BothOfMatcher< + typename AllOfResult3::type, + typename AllOfResult4::type + > type; +}; + +template +struct AllOfResult8 { + typedef BothOfMatcher< + typename AllOfResult4::type, + typename AllOfResult4::type + > type; +}; + +template +struct AllOfResult9 { + typedef BothOfMatcher< + typename AllOfResult4::type, + typename AllOfResult5::type + > type; +}; + +template +struct AllOfResult10 { + typedef BothOfMatcher< + typename AllOfResult5::type, + typename AllOfResult5::type + > type; +}; + +// A set of metafunctions for computing the result type of AnyOf. +// AnyOf(m1, ..., mN) returns +// AnyOfResultN::type. + +// Although AnyOf isn't defined for one argument, AnyOfResult1 is defined +// to simplify the implementation. +template +struct AnyOfResult1 { + typedef M1 type; +}; + +template +struct AnyOfResult2 { + typedef EitherOfMatcher< + typename AnyOfResult1::type, + typename AnyOfResult1::type + > type; +}; + +template +struct AnyOfResult3 { + typedef EitherOfMatcher< + typename AnyOfResult1::type, + typename AnyOfResult2::type + > type; +}; + +template +struct AnyOfResult4 { + typedef EitherOfMatcher< + typename AnyOfResult2::type, + typename AnyOfResult2::type + > type; +}; + +template +struct AnyOfResult5 { + typedef EitherOfMatcher< + typename AnyOfResult2::type, + typename AnyOfResult3::type + > type; +}; + +template +struct AnyOfResult6 { + typedef EitherOfMatcher< + typename AnyOfResult3::type, + typename AnyOfResult3::type + > type; +}; + +template +struct AnyOfResult7 { + typedef EitherOfMatcher< + typename AnyOfResult3::type, + typename AnyOfResult4::type + > type; +}; + +template +struct AnyOfResult8 { + typedef EitherOfMatcher< + typename AnyOfResult4::type, + typename AnyOfResult4::type + > type; +}; + +template +struct AnyOfResult9 { + typedef EitherOfMatcher< + typename AnyOfResult4::type, + typename AnyOfResult5::type + > type; +}; + +template +struct AnyOfResult10 { + typedef EitherOfMatcher< + typename AnyOfResult5::type, + typename AnyOfResult5::type + > type; +}; + +} // namespace internal + +// Args(a_matcher) matches a tuple if the selected +// fields of it matches a_matcher. C++ doesn't support default +// arguments for function templates, so we have to overload it. +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +template +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + +// ElementsAre(e_1, e_2, ... e_n) matches an STL-style container with +// n elements, where the i-th element in the container must +// match the i-th argument in the list. Each argument of +// ElementsAre() can be either a value or a matcher. We support up to +// 10 arguments. +// +// The use of DecayArray in the implementation allows ElementsAre() +// to accept string literals, whose type is const char[N], but we +// want to treat them as const char*. +// +// NOTE: Since ElementsAre() cares about the order of the elements, it +// must not be used with containers whose elements's order is +// undefined (e.g. hash_map). + +inline internal::ElementsAreMatcher< + ::testing::tuple<> > +ElementsAre() { + typedef ::testing::tuple<> Args; + return internal::ElementsAreMatcher(Args()); +} + +template +inline internal::ElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type> > +ElementsAre(const T1& e1) { + typedef ::testing::tuple< + typename internal::DecayArray::type> Args; + return internal::ElementsAreMatcher(Args(e1)); +} + +template +inline internal::ElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +ElementsAre(const T1& e1, const T2& e2) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::ElementsAreMatcher(Args(e1, e2)); +} + +template +inline internal::ElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +ElementsAre(const T1& e1, const T2& e2, const T3& e3) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::ElementsAreMatcher(Args(e1, e2, e3)); +} + +template +inline internal::ElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +ElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::ElementsAreMatcher(Args(e1, e2, e3, e4)); +} + +template +inline internal::ElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +ElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::ElementsAreMatcher(Args(e1, e2, e3, e4, e5)); +} + +template +inline internal::ElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +ElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5, const T6& e6) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::ElementsAreMatcher(Args(e1, e2, e3, e4, e5, e6)); +} + +template +inline internal::ElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +ElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5, const T6& e6, const T7& e7) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::ElementsAreMatcher(Args(e1, e2, e3, e4, e5, e6, e7)); +} + +template +inline internal::ElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +ElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5, const T6& e6, const T7& e7, const T8& e8) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::ElementsAreMatcher(Args(e1, e2, e3, e4, e5, e6, e7, + e8)); +} + +template +inline internal::ElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +ElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5, const T6& e6, const T7& e7, const T8& e8, const T9& e9) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::ElementsAreMatcher(Args(e1, e2, e3, e4, e5, e6, e7, + e8, e9)); +} + +template +inline internal::ElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +ElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5, const T6& e6, const T7& e7, const T8& e8, const T9& e9, + const T10& e10) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::ElementsAreMatcher(Args(e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10)); +} + +// UnorderedElementsAre(e_1, e_2, ..., e_n) is an ElementsAre extension +// that matches n elements in any order. We support up to n=10 arguments. + +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple<> > +UnorderedElementsAre() { + typedef ::testing::tuple<> Args; + return internal::UnorderedElementsAreMatcher(Args()); +} + +template +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type> > +UnorderedElementsAre(const T1& e1) { + typedef ::testing::tuple< + typename internal::DecayArray::type> Args; + return internal::UnorderedElementsAreMatcher(Args(e1)); +} + +template +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +UnorderedElementsAre(const T1& e1, const T2& e2) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::UnorderedElementsAreMatcher(Args(e1, e2)); +} + +template +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +UnorderedElementsAre(const T1& e1, const T2& e2, const T3& e3) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::UnorderedElementsAreMatcher(Args(e1, e2, e3)); +} + +template +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +UnorderedElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::UnorderedElementsAreMatcher(Args(e1, e2, e3, e4)); +} + +template +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +UnorderedElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::UnorderedElementsAreMatcher(Args(e1, e2, e3, e4, e5)); +} + +template +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +UnorderedElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5, const T6& e6) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::UnorderedElementsAreMatcher(Args(e1, e2, e3, e4, e5, + e6)); +} + +template +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +UnorderedElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5, const T6& e6, const T7& e7) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::UnorderedElementsAreMatcher(Args(e1, e2, e3, e4, e5, + e6, e7)); +} + +template +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +UnorderedElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5, const T6& e6, const T7& e7, const T8& e8) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::UnorderedElementsAreMatcher(Args(e1, e2, e3, e4, e5, + e6, e7, e8)); +} + +template +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +UnorderedElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5, const T6& e6, const T7& e7, const T8& e8, const T9& e9) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::UnorderedElementsAreMatcher(Args(e1, e2, e3, e4, e5, + e6, e7, e8, e9)); +} + +template +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> > +UnorderedElementsAre(const T1& e1, const T2& e2, const T3& e3, const T4& e4, + const T5& e5, const T6& e6, const T7& e7, const T8& e8, const T9& e9, + const T10& e10) { + typedef ::testing::tuple< + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type, + typename internal::DecayArray::type> Args; + return internal::UnorderedElementsAreMatcher(Args(e1, e2, e3, e4, e5, + e6, e7, e8, e9, e10)); +} + +// AllOf(m1, m2, ..., mk) matches any value that matches all of the given +// sub-matchers. AllOf is called fully qualified to prevent ADL from firing. + +template +inline typename internal::AllOfResult2::type +AllOf(M1 m1, M2 m2) { + return typename internal::AllOfResult2::type( + m1, + m2); +} + +template +inline typename internal::AllOfResult3::type +AllOf(M1 m1, M2 m2, M3 m3) { + return typename internal::AllOfResult3::type( + m1, + ::testing::AllOf(m2, m3)); +} + +template +inline typename internal::AllOfResult4::type +AllOf(M1 m1, M2 m2, M3 m3, M4 m4) { + return typename internal::AllOfResult4::type( + ::testing::AllOf(m1, m2), + ::testing::AllOf(m3, m4)); +} + +template +inline typename internal::AllOfResult5::type +AllOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5) { + return typename internal::AllOfResult5::type( + ::testing::AllOf(m1, m2), + ::testing::AllOf(m3, m4, m5)); +} + +template +inline typename internal::AllOfResult6::type +AllOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5, M6 m6) { + return typename internal::AllOfResult6::type( + ::testing::AllOf(m1, m2, m3), + ::testing::AllOf(m4, m5, m6)); +} + +template +inline typename internal::AllOfResult7::type +AllOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5, M6 m6, M7 m7) { + return typename internal::AllOfResult7::type( + ::testing::AllOf(m1, m2, m3), + ::testing::AllOf(m4, m5, m6, m7)); +} + +template +inline typename internal::AllOfResult8::type +AllOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5, M6 m6, M7 m7, M8 m8) { + return typename internal::AllOfResult8::type( + ::testing::AllOf(m1, m2, m3, m4), + ::testing::AllOf(m5, m6, m7, m8)); +} + +template +inline typename internal::AllOfResult9::type +AllOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5, M6 m6, M7 m7, M8 m8, M9 m9) { + return typename internal::AllOfResult9::type( + ::testing::AllOf(m1, m2, m3, m4), + ::testing::AllOf(m5, m6, m7, m8, m9)); +} + +template +inline typename internal::AllOfResult10::type +AllOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5, M6 m6, M7 m7, M8 m8, M9 m9, M10 m10) { + return typename internal::AllOfResult10::type( + ::testing::AllOf(m1, m2, m3, m4, m5), + ::testing::AllOf(m6, m7, m8, m9, m10)); +} + +// AnyOf(m1, m2, ..., mk) matches any value that matches any of the given +// sub-matchers. AnyOf is called fully qualified to prevent ADL from firing. + +template +inline typename internal::AnyOfResult2::type +AnyOf(M1 m1, M2 m2) { + return typename internal::AnyOfResult2::type( + m1, + m2); +} + +template +inline typename internal::AnyOfResult3::type +AnyOf(M1 m1, M2 m2, M3 m3) { + return typename internal::AnyOfResult3::type( + m1, + ::testing::AnyOf(m2, m3)); +} + +template +inline typename internal::AnyOfResult4::type +AnyOf(M1 m1, M2 m2, M3 m3, M4 m4) { + return typename internal::AnyOfResult4::type( + ::testing::AnyOf(m1, m2), + ::testing::AnyOf(m3, m4)); +} + +template +inline typename internal::AnyOfResult5::type +AnyOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5) { + return typename internal::AnyOfResult5::type( + ::testing::AnyOf(m1, m2), + ::testing::AnyOf(m3, m4, m5)); +} + +template +inline typename internal::AnyOfResult6::type +AnyOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5, M6 m6) { + return typename internal::AnyOfResult6::type( + ::testing::AnyOf(m1, m2, m3), + ::testing::AnyOf(m4, m5, m6)); +} + +template +inline typename internal::AnyOfResult7::type +AnyOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5, M6 m6, M7 m7) { + return typename internal::AnyOfResult7::type( + ::testing::AnyOf(m1, m2, m3), + ::testing::AnyOf(m4, m5, m6, m7)); +} + +template +inline typename internal::AnyOfResult8::type +AnyOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5, M6 m6, M7 m7, M8 m8) { + return typename internal::AnyOfResult8::type( + ::testing::AnyOf(m1, m2, m3, m4), + ::testing::AnyOf(m5, m6, m7, m8)); +} + +template +inline typename internal::AnyOfResult9::type +AnyOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5, M6 m6, M7 m7, M8 m8, M9 m9) { + return typename internal::AnyOfResult9::type( + ::testing::AnyOf(m1, m2, m3, m4), + ::testing::AnyOf(m5, m6, m7, m8, m9)); +} + +template +inline typename internal::AnyOfResult10::type +AnyOf(M1 m1, M2 m2, M3 m3, M4 m4, M5 m5, M6 m6, M7 m7, M8 m8, M9 m9, M10 m10) { + return typename internal::AnyOfResult10::type( + ::testing::AnyOf(m1, m2, m3, m4, m5), + ::testing::AnyOf(m6, m7, m8, m9, m10)); +} + +} // namespace testing + + +// The MATCHER* family of macros can be used in a namespace scope to +// define custom matchers easily. +// +// Basic Usage +// =========== +// +// The syntax +// +// MATCHER(name, description_string) { statements; } +// +// defines a matcher with the given name that executes the statements, +// which must return a bool to indicate if the match succeeds. Inside +// the statements, you can refer to the value being matched by 'arg', +// and refer to its type by 'arg_type'. +// +// The description string documents what the matcher does, and is used +// to generate the failure message when the match fails. Since a +// MATCHER() is usually defined in a header file shared by multiple +// C++ source files, we require the description to be a C-string +// literal to avoid possible side effects. It can be empty, in which +// case we'll use the sequence of words in the matcher name as the +// description. +// +// For example: +// +// MATCHER(IsEven, "") { return (arg % 2) == 0; } +// +// allows you to write +// +// // Expects mock_foo.Bar(n) to be called where n is even. +// EXPECT_CALL(mock_foo, Bar(IsEven())); +// +// or, +// +// // Verifies that the value of some_expression is even. +// EXPECT_THAT(some_expression, IsEven()); +// +// If the above assertion fails, it will print something like: +// +// Value of: some_expression +// Expected: is even +// Actual: 7 +// +// where the description "is even" is automatically calculated from the +// matcher name IsEven. +// +// Argument Type +// ============= +// +// Note that the type of the value being matched (arg_type) is +// determined by the context in which you use the matcher and is +// supplied to you by the compiler, so you don't need to worry about +// declaring it (nor can you). This allows the matcher to be +// polymorphic. For example, IsEven() can be used to match any type +// where the value of "(arg % 2) == 0" can be implicitly converted to +// a bool. In the "Bar(IsEven())" example above, if method Bar() +// takes an int, 'arg_type' will be int; if it takes an unsigned long, +// 'arg_type' will be unsigned long; and so on. +// +// Parameterizing Matchers +// ======================= +// +// Sometimes you'll want to parameterize the matcher. For that you +// can use another macro: +// +// MATCHER_P(name, param_name, description_string) { statements; } +// +// For example: +// +// MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; } +// +// will allow you to write: +// +// EXPECT_THAT(Blah("a"), HasAbsoluteValue(n)); +// +// which may lead to this message (assuming n is 10): +// +// Value of: Blah("a") +// Expected: has absolute value 10 +// Actual: -9 +// +// Note that both the matcher description and its parameter are +// printed, making the message human-friendly. +// +// In the matcher definition body, you can write 'foo_type' to +// reference the type of a parameter named 'foo'. For example, in the +// body of MATCHER_P(HasAbsoluteValue, value) above, you can write +// 'value_type' to refer to the type of 'value'. +// +// We also provide MATCHER_P2, MATCHER_P3, ..., up to MATCHER_P10 to +// support multi-parameter matchers. +// +// Describing Parameterized Matchers +// ================================= +// +// The last argument to MATCHER*() is a string-typed expression. The +// expression can reference all of the matcher's parameters and a +// special bool-typed variable named 'negation'. When 'negation' is +// false, the expression should evaluate to the matcher's description; +// otherwise it should evaluate to the description of the negation of +// the matcher. For example, +// +// using testing::PrintToString; +// +// MATCHER_P2(InClosedRange, low, hi, +// string(negation ? "is not" : "is") + " in range [" + +// PrintToString(low) + ", " + PrintToString(hi) + "]") { +// return low <= arg && arg <= hi; +// } +// ... +// EXPECT_THAT(3, InClosedRange(4, 6)); +// EXPECT_THAT(3, Not(InClosedRange(2, 4))); +// +// would generate two failures that contain the text: +// +// Expected: is in range [4, 6] +// ... +// Expected: is not in range [2, 4] +// +// If you specify "" as the description, the failure message will +// contain the sequence of words in the matcher name followed by the +// parameter values printed as a tuple. For example, +// +// MATCHER_P2(InClosedRange, low, hi, "") { ... } +// ... +// EXPECT_THAT(3, InClosedRange(4, 6)); +// EXPECT_THAT(3, Not(InClosedRange(2, 4))); +// +// would generate two failures that contain the text: +// +// Expected: in closed range (4, 6) +// ... +// Expected: not (in closed range (2, 4)) +// +// Types of Matcher Parameters +// =========================== +// +// For the purpose of typing, you can view +// +// MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... } +// +// as shorthand for +// +// template +// FooMatcherPk +// Foo(p1_type p1, ..., pk_type pk) { ... } +// +// When you write Foo(v1, ..., vk), the compiler infers the types of +// the parameters v1, ..., and vk for you. If you are not happy with +// the result of the type inference, you can specify the types by +// explicitly instantiating the template, as in Foo(5, +// false). As said earlier, you don't get to (or need to) specify +// 'arg_type' as that's determined by the context in which the matcher +// is used. You can assign the result of expression Foo(p1, ..., pk) +// to a variable of type FooMatcherPk. This +// can be useful when composing matchers. +// +// While you can instantiate a matcher template with reference types, +// passing the parameters by pointer usually makes your code more +// readable. If, however, you still want to pass a parameter by +// reference, be aware that in the failure message generated by the +// matcher you will see the value of the referenced object but not its +// address. +// +// Explaining Match Results +// ======================== +// +// Sometimes the matcher description alone isn't enough to explain why +// the match has failed or succeeded. For example, when expecting a +// long string, it can be very helpful to also print the diff between +// the expected string and the actual one. To achieve that, you can +// optionally stream additional information to a special variable +// named result_listener, whose type is a pointer to class +// MatchResultListener: +// +// MATCHER_P(EqualsLongString, str, "") { +// if (arg == str) return true; +// +// *result_listener << "the difference: " +/// << DiffStrings(str, arg); +// return false; +// } +// +// Overloading Matchers +// ==================== +// +// You can overload matchers with different numbers of parameters: +// +// MATCHER_P(Blah, a, description_string1) { ... } +// MATCHER_P2(Blah, a, b, description_string2) { ... } +// +// Caveats +// ======= +// +// When defining a new matcher, you should also consider implementing +// MatcherInterface or using MakePolymorphicMatcher(). These +// approaches require more work than the MATCHER* macros, but also +// give you more control on the types of the value being matched and +// the matcher parameters, which may leads to better compiler error +// messages when the matcher is used wrong. They also allow +// overloading matchers based on parameter types (as opposed to just +// based on the number of parameters). +// +// MATCHER*() can only be used in a namespace scope. The reason is +// that C++ doesn't yet allow function-local types to be used to +// instantiate templates. The up-coming C++0x standard will fix this. +// Once that's done, we'll consider supporting using MATCHER*() inside +// a function. +// +// More Information +// ================ +// +// To learn more about using these macros, please search for 'MATCHER' +// on https://github.com/google/googletest/blob/master/googlemock/docs/ +// CookBook.md + +#define MATCHER(name, description)\ + class name##Matcher {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + gmock_Impl()\ + {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple<>()));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl());\ + }\ + name##Matcher() {\ + }\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##Matcher);\ + };\ + inline name##Matcher name() {\ + return name##Matcher();\ + }\ + template \ + bool name##Matcher::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P(name, p0, description)\ + template \ + class name##MatcherP {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + explicit gmock_Impl(p0##_type gmock_p0)\ + : p0(gmock_p0) {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type p0;\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple(p0)));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0));\ + }\ + explicit name##MatcherP(p0##_type gmock_p0) : p0(gmock_p0) {\ + }\ + p0##_type p0;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##MatcherP);\ + };\ + template \ + inline name##MatcherP name(p0##_type p0) {\ + return name##MatcherP(p0);\ + }\ + template \ + template \ + bool name##MatcherP::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P2(name, p0, p1, description)\ + template \ + class name##MatcherP2 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1)\ + : p0(gmock_p0), p1(gmock_p1) {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type p0;\ + p1##_type p1;\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple(p0, p1)));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1));\ + }\ + name##MatcherP2(p0##_type gmock_p0, p1##_type gmock_p1) : p0(gmock_p0), \ + p1(gmock_p1) {\ + }\ + p0##_type p0;\ + p1##_type p1;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##MatcherP2);\ + };\ + template \ + inline name##MatcherP2 name(p0##_type p0, \ + p1##_type p1) {\ + return name##MatcherP2(p0, p1);\ + }\ + template \ + template \ + bool name##MatcherP2::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P3(name, p0, p1, p2, description)\ + template \ + class name##MatcherP3 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2)\ + : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2) {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple(p0, p1, \ + p2)));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2));\ + }\ + name##MatcherP3(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2) {\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##MatcherP3);\ + };\ + template \ + inline name##MatcherP3 name(p0##_type p0, \ + p1##_type p1, p2##_type p2) {\ + return name##MatcherP3(p0, p1, p2);\ + }\ + template \ + template \ + bool name##MatcherP3::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P4(name, p0, p1, p2, p3, description)\ + template \ + class name##MatcherP4 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3)\ + : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), p3(gmock_p3) {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple(p0, p1, p2, p3)));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3));\ + }\ + name##MatcherP4(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3) : p0(gmock_p0), p1(gmock_p1), \ + p2(gmock_p2), p3(gmock_p3) {\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##MatcherP4);\ + };\ + template \ + inline name##MatcherP4 name(p0##_type p0, p1##_type p1, p2##_type p2, \ + p3##_type p3) {\ + return name##MatcherP4(p0, \ + p1, p2, p3);\ + }\ + template \ + template \ + bool name##MatcherP4::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P5(name, p0, p1, p2, p3, p4, description)\ + template \ + class name##MatcherP5 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4)\ + : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), p3(gmock_p3), \ + p4(gmock_p4) {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple(p0, p1, p2, p3, p4)));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4));\ + }\ + name##MatcherP5(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, \ + p4##_type gmock_p4) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4) {\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##MatcherP5);\ + };\ + template \ + inline name##MatcherP5 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4) {\ + return name##MatcherP5(p0, p1, p2, p3, p4);\ + }\ + template \ + template \ + bool name##MatcherP5::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P6(name, p0, p1, p2, p3, p4, p5, description)\ + template \ + class name##MatcherP6 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5)\ + : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), p3(gmock_p3), \ + p4(gmock_p4), p5(gmock_p5) {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple(p0, p1, p2, p3, p4, p5)));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4, p5));\ + }\ + name##MatcherP6(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5) {\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##MatcherP6);\ + };\ + template \ + inline name##MatcherP6 name(p0##_type p0, p1##_type p1, p2##_type p2, \ + p3##_type p3, p4##_type p4, p5##_type p5) {\ + return name##MatcherP6(p0, p1, p2, p3, p4, p5);\ + }\ + template \ + template \ + bool name##MatcherP6::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P7(name, p0, p1, p2, p3, p4, p5, p6, description)\ + template \ + class name##MatcherP7 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6)\ + : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), p3(gmock_p3), \ + p4(gmock_p4), p5(gmock_p5), p6(gmock_p6) {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple(p0, p1, p2, p3, p4, p5, \ + p6)));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4, p5, p6));\ + }\ + name##MatcherP7(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6) : p0(gmock_p0), p1(gmock_p1), \ + p2(gmock_p2), p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), \ + p6(gmock_p6) {\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##MatcherP7);\ + };\ + template \ + inline name##MatcherP7 name(p0##_type p0, p1##_type p1, \ + p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ + p6##_type p6) {\ + return name##MatcherP7(p0, p1, p2, p3, p4, p5, p6);\ + }\ + template \ + template \ + bool name##MatcherP7::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P8(name, p0, p1, p2, p3, p4, p5, p6, p7, description)\ + template \ + class name##MatcherP8 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7)\ + : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), p3(gmock_p3), \ + p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), p7(gmock_p7) {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple(p0, p1, p2, \ + p3, p4, p5, p6, p7)));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4, p5, p6, p7));\ + }\ + name##MatcherP8(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, \ + p7##_type gmock_p7) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), \ + p7(gmock_p7) {\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##MatcherP8);\ + };\ + template \ + inline name##MatcherP8 name(p0##_type p0, \ + p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ + p6##_type p6, p7##_type p7) {\ + return name##MatcherP8(p0, p1, p2, p3, p4, p5, \ + p6, p7);\ + }\ + template \ + template \ + bool name##MatcherP8::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P9(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, description)\ + template \ + class name##MatcherP9 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8)\ + : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), p3(gmock_p3), \ + p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), p7(gmock_p7), \ + p8(gmock_p8) {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple(p0, p1, p2, p3, p4, p5, p6, p7, p8)));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4, p5, p6, p7, p8));\ + }\ + name##MatcherP9(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8) : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), \ + p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), p7(gmock_p7), \ + p8(gmock_p8) {\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##MatcherP9);\ + };\ + template \ + inline name##MatcherP9 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, \ + p8##_type p8) {\ + return name##MatcherP9(p0, p1, p2, \ + p3, p4, p5, p6, p7, p8);\ + }\ + template \ + template \ + bool name##MatcherP9::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P10(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, description)\ + template \ + class name##MatcherP10 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8, \ + p9##_type gmock_p9)\ + : p0(gmock_p0), p1(gmock_p1), p2(gmock_p2), p3(gmock_p3), \ + p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), p7(gmock_p7), \ + p8(gmock_p8), p9(gmock_p9) {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + p9##_type p9;\ + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9));\ + }\ + name##MatcherP10(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8, p9##_type gmock_p9) : p0(gmock_p0), p1(gmock_p1), \ + p2(gmock_p2), p3(gmock_p3), p4(gmock_p4), p5(gmock_p5), p6(gmock_p6), \ + p7(gmock_p7), p8(gmock_p8), p9(gmock_p9) {\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + p9##_type p9;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##MatcherP10);\ + };\ + template \ + inline name##MatcherP10 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, \ + p9##_type p9) {\ + return name##MatcherP10(p0, \ + p1, p2, p3, p4, p5, p6, p7, p8, p9);\ + }\ + template \ + template \ + bool name##MatcherP10::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-generated-matchers.h.pump b/tools/external/googletest/googlemock/include/gmock/gmock-generated-matchers.h.pump new file mode 100644 index 00000000..25d2da99 --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-generated-matchers.h.pump @@ -0,0 +1,673 @@ +$$ -*- mode: c++; -*- +$$ This is a Pump source file. Please use Pump to convert it to +$$ gmock-generated-actions.h. +$$ +$var n = 10 $$ The maximum arity we support. +$$ }} This line fixes auto-indentation of the following code in Emacs. +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used variadic matchers. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ + +#include +#include +#include +#include +#include "gmock/gmock-matchers.h" + +namespace testing { +namespace internal { + +$range i 0..n-1 + +// The type of the i-th (0-based) field of Tuple. +#define GMOCK_FIELD_TYPE_(Tuple, i) \ + typename ::testing::tuple_element::type + +// TupleFields is for selecting fields from a +// tuple of type Tuple. It has two members: +// +// type: a tuple type whose i-th field is the ki-th field of Tuple. +// GetSelectedFields(t): returns fields k0, ..., and kn of t as a tuple. +// +// For example, in class TupleFields, 2, 0>, we have: +// +// type is tuple, and +// GetSelectedFields(make_tuple(true, 'a', 42)) is (42, true). + +template +class TupleFields; + +// This generic version is used when there are $n selectors. +template +class TupleFields { + public: + typedef ::testing::tuple<$for i, [[GMOCK_FIELD_TYPE_(Tuple, k$i)]]> type; + static type GetSelectedFields(const Tuple& t) { + return type($for i, [[get(t)]]); + } +}; + +// The following specialization is used for 0 ~ $(n-1) selectors. + +$for i [[ +$$ }}} +$range j 0..i-1 +$range k 0..n-1 + +template +class TupleFields { + public: + typedef ::testing::tuple<$for j, [[GMOCK_FIELD_TYPE_(Tuple, k$j)]]> type; + static type GetSelectedFields(const Tuple& $if i==0 [[/* t */]] $else [[t]]) { + return type($for j, [[get(t)]]); + } +}; + +]] + +#undef GMOCK_FIELD_TYPE_ + +// Implements the Args() matcher. + +$var ks = [[$for i, [[k$i]]]] +template +class ArgsMatcherImpl : public MatcherInterface { + public: + // ArgsTuple may have top-level const or reference modifiers. + typedef GTEST_REMOVE_REFERENCE_AND_CONST_(ArgsTuple) RawArgsTuple; + typedef typename internal::TupleFields::type SelectedArgs; + typedef Matcher MonomorphicInnerMatcher; + + template + explicit ArgsMatcherImpl(const InnerMatcher& inner_matcher) + : inner_matcher_(SafeMatcherCast(inner_matcher)) {} + + virtual bool MatchAndExplain(ArgsTuple args, + MatchResultListener* listener) const { + const SelectedArgs& selected_args = GetSelectedArgs(args); + if (!listener->IsInterested()) + return inner_matcher_.Matches(selected_args); + + PrintIndices(listener->stream()); + *listener << "are " << PrintToString(selected_args); + + StringMatchResultListener inner_listener; + const bool match = inner_matcher_.MatchAndExplain(selected_args, + &inner_listener); + PrintIfNotEmpty(inner_listener.str(), listener->stream()); + return match; + } + + virtual void DescribeTo(::std::ostream* os) const { + *os << "are a tuple "; + PrintIndices(os); + inner_matcher_.DescribeTo(os); + } + + virtual void DescribeNegationTo(::std::ostream* os) const { + *os << "are a tuple "; + PrintIndices(os); + inner_matcher_.DescribeNegationTo(os); + } + + private: + static SelectedArgs GetSelectedArgs(ArgsTuple args) { + return TupleFields::GetSelectedFields(args); + } + + // Prints the indices of the selected fields. + static void PrintIndices(::std::ostream* os) { + *os << "whose fields ("; + const int indices[$n] = { $ks }; + for (int i = 0; i < $n; i++) { + if (indices[i] < 0) + break; + + if (i >= 1) + *os << ", "; + + *os << "#" << indices[i]; + } + *os << ") "; + } + + const MonomorphicInnerMatcher inner_matcher_; + + GTEST_DISALLOW_ASSIGN_(ArgsMatcherImpl); +}; + +template +class ArgsMatcher { + public: + explicit ArgsMatcher(const InnerMatcher& inner_matcher) + : inner_matcher_(inner_matcher) {} + + template + operator Matcher() const { + return MakeMatcher(new ArgsMatcherImpl(inner_matcher_)); + } + + private: + const InnerMatcher inner_matcher_; + + GTEST_DISALLOW_ASSIGN_(ArgsMatcher); +}; + +// A set of metafunctions for computing the result type of AllOf. +// AllOf(m1, ..., mN) returns +// AllOfResultN::type. + +// Although AllOf isn't defined for one argument, AllOfResult1 is defined +// to simplify the implementation. +template +struct AllOfResult1 { + typedef M1 type; +}; + +$range i 1..n + +$range i 2..n +$for i [[ +$range j 2..i +$var m = i/2 +$range k 1..m +$range t m+1..i + +template +struct AllOfResult$i { + typedef BothOfMatcher< + typename AllOfResult$m<$for k, [[M$k]]>::type, + typename AllOfResult$(i-m)<$for t, [[M$t]]>::type + > type; +}; + +]] + +// A set of metafunctions for computing the result type of AnyOf. +// AnyOf(m1, ..., mN) returns +// AnyOfResultN::type. + +// Although AnyOf isn't defined for one argument, AnyOfResult1 is defined +// to simplify the implementation. +template +struct AnyOfResult1 { + typedef M1 type; +}; + +$range i 1..n + +$range i 2..n +$for i [[ +$range j 2..i +$var m = i/2 +$range k 1..m +$range t m+1..i + +template +struct AnyOfResult$i { + typedef EitherOfMatcher< + typename AnyOfResult$m<$for k, [[M$k]]>::type, + typename AnyOfResult$(i-m)<$for t, [[M$t]]>::type + > type; +}; + +]] + +} // namespace internal + +// Args(a_matcher) matches a tuple if the selected +// fields of it matches a_matcher. C++ doesn't support default +// arguments for function templates, so we have to overload it. + +$range i 0..n +$for i [[ +$range j 1..i +template <$for j [[int k$j, ]]typename InnerMatcher> +inline internal::ArgsMatcher +Args(const InnerMatcher& matcher) { + return internal::ArgsMatcher(matcher); +} + + +]] +// ElementsAre(e_1, e_2, ... e_n) matches an STL-style container with +// n elements, where the i-th element in the container must +// match the i-th argument in the list. Each argument of +// ElementsAre() can be either a value or a matcher. We support up to +// $n arguments. +// +// The use of DecayArray in the implementation allows ElementsAre() +// to accept string literals, whose type is const char[N], but we +// want to treat them as const char*. +// +// NOTE: Since ElementsAre() cares about the order of the elements, it +// must not be used with containers whose elements's order is +// undefined (e.g. hash_map). + +$range i 0..n +$for i [[ + +$range j 1..i + +$if i>0 [[ + +template <$for j, [[typename T$j]]> +]] + +inline internal::ElementsAreMatcher< + ::testing::tuple< +$for j, [[ + + typename internal::DecayArray::type]]> > +ElementsAre($for j, [[const T$j& e$j]]) { + typedef ::testing::tuple< +$for j, [[ + + typename internal::DecayArray::type]]> Args; + return internal::ElementsAreMatcher(Args($for j, [[e$j]])); +} + +]] + +// UnorderedElementsAre(e_1, e_2, ..., e_n) is an ElementsAre extension +// that matches n elements in any order. We support up to n=$n arguments. + +$range i 0..n +$for i [[ + +$range j 1..i + +$if i>0 [[ + +template <$for j, [[typename T$j]]> +]] + +inline internal::UnorderedElementsAreMatcher< + ::testing::tuple< +$for j, [[ + + typename internal::DecayArray::type]]> > +UnorderedElementsAre($for j, [[const T$j& e$j]]) { + typedef ::testing::tuple< +$for j, [[ + + typename internal::DecayArray::type]]> Args; + return internal::UnorderedElementsAreMatcher(Args($for j, [[e$j]])); +} + +]] + +// AllOf(m1, m2, ..., mk) matches any value that matches all of the given +// sub-matchers. AllOf is called fully qualified to prevent ADL from firing. + +$range i 2..n +$for i [[ +$range j 1..i +$var m = i/2 +$range k 1..m +$range t m+1..i + +template <$for j, [[typename M$j]]> +inline typename internal::AllOfResult$i<$for j, [[M$j]]>::type +AllOf($for j, [[M$j m$j]]) { + return typename internal::AllOfResult$i<$for j, [[M$j]]>::type( + $if m == 1 [[m1]] $else [[::testing::AllOf($for k, [[m$k]])]], + $if m+1 == i [[m$i]] $else [[::testing::AllOf($for t, [[m$t]])]]); +} + +]] + +// AnyOf(m1, m2, ..., mk) matches any value that matches any of the given +// sub-matchers. AnyOf is called fully qualified to prevent ADL from firing. + +$range i 2..n +$for i [[ +$range j 1..i +$var m = i/2 +$range k 1..m +$range t m+1..i + +template <$for j, [[typename M$j]]> +inline typename internal::AnyOfResult$i<$for j, [[M$j]]>::type +AnyOf($for j, [[M$j m$j]]) { + return typename internal::AnyOfResult$i<$for j, [[M$j]]>::type( + $if m == 1 [[m1]] $else [[::testing::AnyOf($for k, [[m$k]])]], + $if m+1 == i [[m$i]] $else [[::testing::AnyOf($for t, [[m$t]])]]); +} + +]] + +} // namespace testing +$$ } // This Pump meta comment fixes auto-indentation in Emacs. It will not +$$ // show up in the generated code. + + +// The MATCHER* family of macros can be used in a namespace scope to +// define custom matchers easily. +// +// Basic Usage +// =========== +// +// The syntax +// +// MATCHER(name, description_string) { statements; } +// +// defines a matcher with the given name that executes the statements, +// which must return a bool to indicate if the match succeeds. Inside +// the statements, you can refer to the value being matched by 'arg', +// and refer to its type by 'arg_type'. +// +// The description string documents what the matcher does, and is used +// to generate the failure message when the match fails. Since a +// MATCHER() is usually defined in a header file shared by multiple +// C++ source files, we require the description to be a C-string +// literal to avoid possible side effects. It can be empty, in which +// case we'll use the sequence of words in the matcher name as the +// description. +// +// For example: +// +// MATCHER(IsEven, "") { return (arg % 2) == 0; } +// +// allows you to write +// +// // Expects mock_foo.Bar(n) to be called where n is even. +// EXPECT_CALL(mock_foo, Bar(IsEven())); +// +// or, +// +// // Verifies that the value of some_expression is even. +// EXPECT_THAT(some_expression, IsEven()); +// +// If the above assertion fails, it will print something like: +// +// Value of: some_expression +// Expected: is even +// Actual: 7 +// +// where the description "is even" is automatically calculated from the +// matcher name IsEven. +// +// Argument Type +// ============= +// +// Note that the type of the value being matched (arg_type) is +// determined by the context in which you use the matcher and is +// supplied to you by the compiler, so you don't need to worry about +// declaring it (nor can you). This allows the matcher to be +// polymorphic. For example, IsEven() can be used to match any type +// where the value of "(arg % 2) == 0" can be implicitly converted to +// a bool. In the "Bar(IsEven())" example above, if method Bar() +// takes an int, 'arg_type' will be int; if it takes an unsigned long, +// 'arg_type' will be unsigned long; and so on. +// +// Parameterizing Matchers +// ======================= +// +// Sometimes you'll want to parameterize the matcher. For that you +// can use another macro: +// +// MATCHER_P(name, param_name, description_string) { statements; } +// +// For example: +// +// MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; } +// +// will allow you to write: +// +// EXPECT_THAT(Blah("a"), HasAbsoluteValue(n)); +// +// which may lead to this message (assuming n is 10): +// +// Value of: Blah("a") +// Expected: has absolute value 10 +// Actual: -9 +// +// Note that both the matcher description and its parameter are +// printed, making the message human-friendly. +// +// In the matcher definition body, you can write 'foo_type' to +// reference the type of a parameter named 'foo'. For example, in the +// body of MATCHER_P(HasAbsoluteValue, value) above, you can write +// 'value_type' to refer to the type of 'value'. +// +// We also provide MATCHER_P2, MATCHER_P3, ..., up to MATCHER_P$n to +// support multi-parameter matchers. +// +// Describing Parameterized Matchers +// ================================= +// +// The last argument to MATCHER*() is a string-typed expression. The +// expression can reference all of the matcher's parameters and a +// special bool-typed variable named 'negation'. When 'negation' is +// false, the expression should evaluate to the matcher's description; +// otherwise it should evaluate to the description of the negation of +// the matcher. For example, +// +// using testing::PrintToString; +// +// MATCHER_P2(InClosedRange, low, hi, +// string(negation ? "is not" : "is") + " in range [" + +// PrintToString(low) + ", " + PrintToString(hi) + "]") { +// return low <= arg && arg <= hi; +// } +// ... +// EXPECT_THAT(3, InClosedRange(4, 6)); +// EXPECT_THAT(3, Not(InClosedRange(2, 4))); +// +// would generate two failures that contain the text: +// +// Expected: is in range [4, 6] +// ... +// Expected: is not in range [2, 4] +// +// If you specify "" as the description, the failure message will +// contain the sequence of words in the matcher name followed by the +// parameter values printed as a tuple. For example, +// +// MATCHER_P2(InClosedRange, low, hi, "") { ... } +// ... +// EXPECT_THAT(3, InClosedRange(4, 6)); +// EXPECT_THAT(3, Not(InClosedRange(2, 4))); +// +// would generate two failures that contain the text: +// +// Expected: in closed range (4, 6) +// ... +// Expected: not (in closed range (2, 4)) +// +// Types of Matcher Parameters +// =========================== +// +// For the purpose of typing, you can view +// +// MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... } +// +// as shorthand for +// +// template +// FooMatcherPk +// Foo(p1_type p1, ..., pk_type pk) { ... } +// +// When you write Foo(v1, ..., vk), the compiler infers the types of +// the parameters v1, ..., and vk for you. If you are not happy with +// the result of the type inference, you can specify the types by +// explicitly instantiating the template, as in Foo(5, +// false). As said earlier, you don't get to (or need to) specify +// 'arg_type' as that's determined by the context in which the matcher +// is used. You can assign the result of expression Foo(p1, ..., pk) +// to a variable of type FooMatcherPk. This +// can be useful when composing matchers. +// +// While you can instantiate a matcher template with reference types, +// passing the parameters by pointer usually makes your code more +// readable. If, however, you still want to pass a parameter by +// reference, be aware that in the failure message generated by the +// matcher you will see the value of the referenced object but not its +// address. +// +// Explaining Match Results +// ======================== +// +// Sometimes the matcher description alone isn't enough to explain why +// the match has failed or succeeded. For example, when expecting a +// long string, it can be very helpful to also print the diff between +// the expected string and the actual one. To achieve that, you can +// optionally stream additional information to a special variable +// named result_listener, whose type is a pointer to class +// MatchResultListener: +// +// MATCHER_P(EqualsLongString, str, "") { +// if (arg == str) return true; +// +// *result_listener << "the difference: " +/// << DiffStrings(str, arg); +// return false; +// } +// +// Overloading Matchers +// ==================== +// +// You can overload matchers with different numbers of parameters: +// +// MATCHER_P(Blah, a, description_string1) { ... } +// MATCHER_P2(Blah, a, b, description_string2) { ... } +// +// Caveats +// ======= +// +// When defining a new matcher, you should also consider implementing +// MatcherInterface or using MakePolymorphicMatcher(). These +// approaches require more work than the MATCHER* macros, but also +// give you more control on the types of the value being matched and +// the matcher parameters, which may leads to better compiler error +// messages when the matcher is used wrong. They also allow +// overloading matchers based on parameter types (as opposed to just +// based on the number of parameters). +// +// MATCHER*() can only be used in a namespace scope. The reason is +// that C++ doesn't yet allow function-local types to be used to +// instantiate templates. The up-coming C++0x standard will fix this. +// Once that's done, we'll consider supporting using MATCHER*() inside +// a function. +// +// More Information +// ================ +// +// To learn more about using these macros, please search for 'MATCHER' +// on https://github.com/google/googletest/blob/master/googlemock/docs/CookBook.md + +$range i 0..n +$for i + +[[ +$var macro_name = [[$if i==0 [[MATCHER]] $elif i==1 [[MATCHER_P]] + $else [[MATCHER_P$i]]]] +$var class_name = [[name##Matcher[[$if i==0 [[]] $elif i==1 [[P]] + $else [[P$i]]]]]] +$range j 0..i-1 +$var template = [[$if i==0 [[]] $else [[ + + template <$for j, [[typename p$j##_type]]>\ +]]]] +$var ctor_param_list = [[$for j, [[p$j##_type gmock_p$j]]]] +$var impl_ctor_param_list = [[$for j, [[p$j##_type gmock_p$j]]]] +$var impl_inits = [[$if i==0 [[]] $else [[ : $for j, [[p$j(gmock_p$j)]]]]]] +$var inits = [[$if i==0 [[]] $else [[ : $for j, [[p$j(gmock_p$j)]]]]]] +$var params = [[$for j, [[p$j]]]] +$var param_types = [[$if i==0 [[]] $else [[<$for j, [[p$j##_type]]>]]]] +$var param_types_and_names = [[$for j, [[p$j##_type p$j]]]] +$var param_field_decls = [[$for j +[[ + + p$j##_type p$j;\ +]]]] +$var param_field_decls2 = [[$for j +[[ + + p$j##_type p$j;\ +]]]] + +#define $macro_name(name$for j [[, p$j]], description)\$template + class $class_name {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface {\ + public:\ + [[$if i==1 [[explicit ]]]]gmock_Impl($impl_ctor_param_list)\ + $impl_inits {}\ + virtual bool MatchAndExplain(\ + arg_type arg, ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\$param_field_decls + private:\ + ::testing::internal::string FormatDescription(bool negation) const {\ + const ::testing::internal::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::testing::tuple<$for j, [[p$j##_type]]>($for j, [[p$j]])));\ + }\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl($params));\ + }\ + [[$if i==1 [[explicit ]]]]$class_name($ctor_param_list)$inits {\ + }\$param_field_decls2 + private:\ + GTEST_DISALLOW_ASSIGN_($class_name);\ + };\$template + inline $class_name$param_types name($param_types_and_names) {\ + return $class_name$param_types($params);\ + }\$template + template \ + bool $class_name$param_types::gmock_Impl::MatchAndExplain(\ + arg_type arg, \ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const +]] + + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-generated-nice-strict.h b/tools/external/googletest/googlemock/include/gmock/gmock-generated-nice-strict.h new file mode 100644 index 00000000..4095f4d5 --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-generated-nice-strict.h @@ -0,0 +1,397 @@ +// This file was GENERATED by command: +// pump.py gmock-generated-nice-strict.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Implements class templates NiceMock, NaggyMock, and StrictMock. +// +// Given a mock class MockFoo that is created using Google Mock, +// NiceMock is a subclass of MockFoo that allows +// uninteresting calls (i.e. calls to mock methods that have no +// EXPECT_CALL specs), NaggyMock is a subclass of MockFoo +// that prints a warning when an uninteresting call occurs, and +// StrictMock is a subclass of MockFoo that treats all +// uninteresting calls as errors. +// +// Currently a mock is naggy by default, so MockFoo and +// NaggyMock behave like the same. However, we will soon +// switch the default behavior of mocks to be nice, as that in general +// leads to more maintainable tests. When that happens, MockFoo will +// stop behaving like NaggyMock and start behaving like +// NiceMock. +// +// NiceMock, NaggyMock, and StrictMock "inherit" the constructors of +// their respective base class, with up-to 10 arguments. Therefore +// you can write NiceMock(5, "a") to construct a nice mock +// where MockFoo has a constructor that accepts (int, const char*), +// for example. +// +// A known limitation is that NiceMock, NaggyMock, +// and StrictMock only works for mock methods defined using +// the MOCK_METHOD* family of macros DIRECTLY in the MockFoo class. +// If a mock method is defined in a base class of MockFoo, the "nice" +// or "strict" modifier may not affect it, depending on the compiler. +// In particular, nesting NiceMock, NaggyMock, and StrictMock is NOT +// supported. +// +// Another known limitation is that the constructors of the base mock +// cannot have arguments passed by non-const reference, which are +// banned by the Google C++ style guide anyway. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_NICE_STRICT_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_NICE_STRICT_H_ + +#include "gmock/gmock-spec-builders.h" +#include "gmock/internal/gmock-port.h" + +namespace testing { + +template +class NiceMock : public MockClass { + public: + // We don't factor out the constructor body to a common method, as + // we have to avoid a possible clash with members of MockClass. + NiceMock() { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + + // C++ doesn't (yet) allow inheritance of constructors, so we have + // to define it for each arity. + template + explicit NiceMock(const A1& a1) : MockClass(a1) { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + template + NiceMock(const A1& a1, const A2& a2) : MockClass(a1, a2) { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NiceMock(const A1& a1, const A2& a2, const A3& a3) : MockClass(a1, a2, a3) { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NiceMock(const A1& a1, const A2& a2, const A3& a3, + const A4& a4) : MockClass(a1, a2, a3, a4) { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NiceMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5) : MockClass(a1, a2, a3, a4, a5) { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NiceMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6) : MockClass(a1, a2, a3, a4, a5, a6) { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NiceMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7) : MockClass(a1, a2, a3, a4, a5, + a6, a7) { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NiceMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7, const A8& a8) : MockClass(a1, + a2, a3, a4, a5, a6, a7, a8) { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NiceMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7, const A8& a8, + const A9& a9) : MockClass(a1, a2, a3, a4, a5, a6, a7, a8, a9) { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NiceMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, + const A10& a10) : MockClass(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) { + ::testing::Mock::AllowUninterestingCalls( + internal::ImplicitCast_(this)); + } + + virtual ~NiceMock() { + ::testing::Mock::UnregisterCallReaction( + internal::ImplicitCast_(this)); + } + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(NiceMock); +}; + +template +class NaggyMock : public MockClass { + public: + // We don't factor out the constructor body to a common method, as + // we have to avoid a possible clash with members of MockClass. + NaggyMock() { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + + // C++ doesn't (yet) allow inheritance of constructors, so we have + // to define it for each arity. + template + explicit NaggyMock(const A1& a1) : MockClass(a1) { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + template + NaggyMock(const A1& a1, const A2& a2) : MockClass(a1, a2) { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NaggyMock(const A1& a1, const A2& a2, const A3& a3) : MockClass(a1, a2, a3) { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NaggyMock(const A1& a1, const A2& a2, const A3& a3, + const A4& a4) : MockClass(a1, a2, a3, a4) { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NaggyMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5) : MockClass(a1, a2, a3, a4, a5) { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NaggyMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6) : MockClass(a1, a2, a3, a4, a5, a6) { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NaggyMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7) : MockClass(a1, a2, a3, a4, a5, + a6, a7) { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NaggyMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7, const A8& a8) : MockClass(a1, + a2, a3, a4, a5, a6, a7, a8) { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NaggyMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7, const A8& a8, + const A9& a9) : MockClass(a1, a2, a3, a4, a5, a6, a7, a8, a9) { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + NaggyMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, + const A10& a10) : MockClass(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) { + ::testing::Mock::WarnUninterestingCalls( + internal::ImplicitCast_(this)); + } + + virtual ~NaggyMock() { + ::testing::Mock::UnregisterCallReaction( + internal::ImplicitCast_(this)); + } + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(NaggyMock); +}; + +template +class StrictMock : public MockClass { + public: + // We don't factor out the constructor body to a common method, as + // we have to avoid a possible clash with members of MockClass. + StrictMock() { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + + // C++ doesn't (yet) allow inheritance of constructors, so we have + // to define it for each arity. + template + explicit StrictMock(const A1& a1) : MockClass(a1) { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + template + StrictMock(const A1& a1, const A2& a2) : MockClass(a1, a2) { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + StrictMock(const A1& a1, const A2& a2, const A3& a3) : MockClass(a1, a2, a3) { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + StrictMock(const A1& a1, const A2& a2, const A3& a3, + const A4& a4) : MockClass(a1, a2, a3, a4) { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + StrictMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5) : MockClass(a1, a2, a3, a4, a5) { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + StrictMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6) : MockClass(a1, a2, a3, a4, a5, a6) { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + StrictMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7) : MockClass(a1, a2, a3, a4, a5, + a6, a7) { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + StrictMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7, const A8& a8) : MockClass(a1, + a2, a3, a4, a5, a6, a7, a8) { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + StrictMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7, const A8& a8, + const A9& a9) : MockClass(a1, a2, a3, a4, a5, a6, a7, a8, a9) { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + + template + StrictMock(const A1& a1, const A2& a2, const A3& a3, const A4& a4, + const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, + const A10& a10) : MockClass(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) { + ::testing::Mock::FailUninterestingCalls( + internal::ImplicitCast_(this)); + } + + virtual ~StrictMock() { + ::testing::Mock::UnregisterCallReaction( + internal::ImplicitCast_(this)); + } + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(StrictMock); +}; + +// The following specializations catch some (relatively more common) +// user errors of nesting nice and strict mocks. They do NOT catch +// all possible errors. + +// These specializations are declared but not defined, as NiceMock, +// NaggyMock, and StrictMock cannot be nested. + +template +class NiceMock >; +template +class NiceMock >; +template +class NiceMock >; + +template +class NaggyMock >; +template +class NaggyMock >; +template +class NaggyMock >; + +template +class StrictMock >; +template +class StrictMock >; +template +class StrictMock >; + +} // namespace testing + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_NICE_STRICT_H_ diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-generated-nice-strict.h.pump b/tools/external/googletest/googlemock/include/gmock/gmock-generated-nice-strict.h.pump new file mode 100644 index 00000000..3ee1ce7f --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-generated-nice-strict.h.pump @@ -0,0 +1,161 @@ +$$ -*- mode: c++; -*- +$$ This is a Pump source file. Please use Pump to convert it to +$$ gmock-generated-nice-strict.h. +$$ +$var n = 10 $$ The maximum arity we support. +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Implements class templates NiceMock, NaggyMock, and StrictMock. +// +// Given a mock class MockFoo that is created using Google Mock, +// NiceMock is a subclass of MockFoo that allows +// uninteresting calls (i.e. calls to mock methods that have no +// EXPECT_CALL specs), NaggyMock is a subclass of MockFoo +// that prints a warning when an uninteresting call occurs, and +// StrictMock is a subclass of MockFoo that treats all +// uninteresting calls as errors. +// +// Currently a mock is naggy by default, so MockFoo and +// NaggyMock behave like the same. However, we will soon +// switch the default behavior of mocks to be nice, as that in general +// leads to more maintainable tests. When that happens, MockFoo will +// stop behaving like NaggyMock and start behaving like +// NiceMock. +// +// NiceMock, NaggyMock, and StrictMock "inherit" the constructors of +// their respective base class, with up-to $n arguments. Therefore +// you can write NiceMock(5, "a") to construct a nice mock +// where MockFoo has a constructor that accepts (int, const char*), +// for example. +// +// A known limitation is that NiceMock, NaggyMock, +// and StrictMock only works for mock methods defined using +// the MOCK_METHOD* family of macros DIRECTLY in the MockFoo class. +// If a mock method is defined in a base class of MockFoo, the "nice" +// or "strict" modifier may not affect it, depending on the compiler. +// In particular, nesting NiceMock, NaggyMock, and StrictMock is NOT +// supported. +// +// Another known limitation is that the constructors of the base mock +// cannot have arguments passed by non-const reference, which are +// banned by the Google C++ style guide anyway. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_NICE_STRICT_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_NICE_STRICT_H_ + +#include "gmock/gmock-spec-builders.h" +#include "gmock/internal/gmock-port.h" + +namespace testing { + +$range kind 0..2 +$for kind [[ + +$var clazz=[[$if kind==0 [[NiceMock]] + $elif kind==1 [[NaggyMock]] + $else [[StrictMock]]]] + +$var method=[[$if kind==0 [[AllowUninterestingCalls]] + $elif kind==1 [[WarnUninterestingCalls]] + $else [[FailUninterestingCalls]]]] + +template +class $clazz : public MockClass { + public: + // We don't factor out the constructor body to a common method, as + // we have to avoid a possible clash with members of MockClass. + $clazz() { + ::testing::Mock::$method( + internal::ImplicitCast_(this)); + } + + // C++ doesn't (yet) allow inheritance of constructors, so we have + // to define it for each arity. + template + explicit $clazz(const A1& a1) : MockClass(a1) { + ::testing::Mock::$method( + internal::ImplicitCast_(this)); + } + +$range i 2..n +$for i [[ +$range j 1..i + template <$for j, [[typename A$j]]> + $clazz($for j, [[const A$j& a$j]]) : MockClass($for j, [[a$j]]) { + ::testing::Mock::$method( + internal::ImplicitCast_(this)); + } + + +]] + virtual ~$clazz() { + ::testing::Mock::UnregisterCallReaction( + internal::ImplicitCast_(this)); + } + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_($clazz); +}; + +]] + +// The following specializations catch some (relatively more common) +// user errors of nesting nice and strict mocks. They do NOT catch +// all possible errors. + +// These specializations are declared but not defined, as NiceMock, +// NaggyMock, and StrictMock cannot be nested. + +template +class NiceMock >; +template +class NiceMock >; +template +class NiceMock >; + +template +class NaggyMock >; +template +class NaggyMock >; +template +class NaggyMock >; + +template +class StrictMock >; +template +class StrictMock >; +template +class StrictMock >; + +} // namespace testing + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_NICE_STRICT_H_ diff --git a/tools/external/googletest/googlemock/include/gmock/gmock-matchers.h b/tools/external/googletest/googlemock/include/gmock/gmock-matchers.h new file mode 100644 index 00000000..c446bf7d --- /dev/null +++ b/tools/external/googletest/googlemock/include/gmock/gmock-matchers.h @@ -0,0 +1,4416 @@ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used argument matchers. More +// matchers can be defined by the user implementing the +// MatcherInterface interface if necessary. + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_ + +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include + +#include "gmock/internal/gmock-internal-utils.h" +#include "gmock/internal/gmock-port.h" +#include "gtest/gtest.h" + +#if GTEST_HAS_STD_INITIALIZER_LIST_ +# include // NOLINT -- must be after gtest.h +#endif + +namespace testing { + +// To implement a matcher Foo for type T, define: +// 1. a class FooMatcherImpl that implements the +// MatcherInterface interface, and +// 2. a factory function that creates a Matcher object from a +// FooMatcherImpl*. +// +// The two-level delegation design makes it possible to allow a user +// to write "v" instead of "Eq(v)" where a Matcher is expected, which +// is impossible if we pass matchers by pointers. It also eases +// ownership management as Matcher objects can now be copied like +// plain values. + +// MatchResultListener is an abstract class. Its << operator can be +// used by a matcher to explain why a value matches or doesn't match. +// +// TODO(wan@google.com): add method +// bool InterestedInWhy(bool result) const; +// to indicate whether the listener is interested in why the match +// result is 'result'. +class MatchResultListener { + public: + // Creates a listener object with the given underlying ostream. The + // listener does not own the ostream, and does not dereference it + // in the constructor or destructor. + explicit MatchResultListener(::std::ostream* os) : stream_(os) {} + virtual ~MatchResultListener() = 0; // Makes this class abstract. + + // Streams x to the underlying ostream; does nothing if the ostream + // is NULL. + template + MatchResultListener& operator<<(const T& x) { + if (stream_ != NULL) + *stream_ << x; + return *this; + } + + // Returns the underlying ostream. + ::std::ostream* stream() { return stream_; } + + // Returns true iff the listener is interested in an explanation of + // the match result. A matcher's MatchAndExplain() method can use + // this information to avoid generating the explanation when no one + // intends to hear it. + bool IsInterested() const { return stream_ != NULL; } + + private: + ::std::ostream* const stream_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(MatchResultListener); +}; + +inline MatchResultListener::~MatchResultListener() { +} + +// An instance of a subclass of this knows how to describe itself as a +// matcher. +class MatcherDescriberInterface { + public: + virtual ~MatcherDescriberInterface() {} + + // Describes this matcher to an ostream. The function should print + // a verb phrase that describes the property a value matching this + // matcher should have. The subject of the verb phrase is the value + // being matched. For example, the DescribeTo() method of the Gt(7) + // matcher prints "is greater than 7". + virtual void DescribeTo(::std::ostream* os) const = 0; + + // Describes the negation of this matcher to an ostream. For + // example, if the description of this matcher is "is greater than + // 7", the negated description could be "is not greater than 7". + // You are not required to override this when implementing + // MatcherInterface, but it is highly advised so that your matcher + // can produce good error messages. + virtual void DescribeNegationTo(::std::ostream* os) const { + *os << "not ("; + DescribeTo(os); + *os << ")"; + } +}; + +// The implementation of a matcher. +template +class MatcherInterface : public MatcherDescriberInterface { + public: + // Returns true iff the matcher matches x; also explains the match + // result to 'listener' if necessary (see the next paragraph), in + // the form of a non-restrictive relative clause ("which ...", + // "whose ...", etc) that describes x. For example, the + // MatchAndExplain() method of the Pointee(...) matcher should + // generate an explanation like "which points to ...". + // + // Implementations of MatchAndExplain() should add an explanation of + // the match result *if and only if* they can provide additional + // information that's not already present (or not obvious) in the + // print-out of x and the matcher's description. Whether the match + // succeeds is not a factor in deciding whether an explanation is + // needed, as sometimes the caller needs to print a failure message + // when the match succeeds (e.g. when the matcher is used inside + // Not()). + // + // For example, a "has at least 10 elements" matcher should explain + // what the actual element count is, regardless of the match result, + // as it is useful information to the reader; on the other hand, an + // "is empty" matcher probably only needs to explain what the actual + // size is when the match fails, as it's redundant to say that the + // size is 0 when the value is already known to be empty. + // + // You should override this method when defining a new matcher. + // + // It's the responsibility of the caller (Google Mock) to guarantee + // that 'listener' is not NULL. This helps to simplify a matcher's + // implementation when it doesn't care about the performance, as it + // can talk to 'listener' without checking its validity first. + // However, in order to implement dummy listeners efficiently, + // listener->stream() may be NULL. + virtual bool MatchAndExplain(T x, MatchResultListener* listener) const = 0; + + // Inherits these methods from MatcherDescriberInterface: + // virtual void DescribeTo(::std::ostream* os) const = 0; + // virtual void DescribeNegationTo(::std::ostream* os) const; +}; + +// A match result listener that stores the explanation in a string. +class StringMatchResultListener : public MatchResultListener { + public: + StringMatchResultListener() : MatchResultListener(&ss_) {} + + // Returns the explanation accumulated so far. + std::string str() const { return ss_.str(); } + + // Clears the explanation accumulated so far. + void Clear() { ss_.str(""); } + + private: + ::std::stringstream ss_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(StringMatchResultListener); +}; + +namespace internal { + +struct AnyEq { + template + bool operator()(const A& a, const B& b) const { return a == b; } +}; +struct AnyNe { + template + bool operator()(const A& a, const B& b) const { return a != b; } +}; +struct AnyLt { + template + bool operator()(const A& a, const B& b) const { return a < b; } +}; +struct AnyGt { + template + bool operator()(const A& a, const B& b) const { return a > b; } +}; +struct AnyLe { + template + bool operator()(const A& a, const B& b) const { return a <= b; } +}; +struct AnyGe { + template + bool operator()(const A& a, const B& b) const { return a >= b; } +}; + +// A match result listener that ignores the explanation. +class DummyMatchResultListener : public MatchResultListener { + public: + DummyMatchResultListener() : MatchResultListener(NULL) {} + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(DummyMatchResultListener); +}; + +// A match result listener that forwards the explanation to a given +// ostream. The difference between this and MatchResultListener is +// that the former is concrete. +class StreamMatchResultListener : public MatchResultListener { + public: + explicit StreamMatchResultListener(::std::ostream* os) + : MatchResultListener(os) {} + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamMatchResultListener); +}; + +// An internal class for implementing Matcher, which will derive +// from it. We put functionalities common to all Matcher +// specializations here to avoid code duplication. +template +class MatcherBase { + public: + // Returns true iff the matcher matches x; also explains the match + // result to 'listener'. + bool MatchAndExplain(T x, MatchResultListener* listener) const { + return impl_->MatchAndExplain(x, listener); + } + + // Returns true iff this matcher matches x. + bool Matches(T x) const { + DummyMatchResultListener dummy; + return MatchAndExplain(x, &dummy); + } + + // Describes this matcher to an ostream. + void DescribeTo(::std::ostream* os) const { impl_->DescribeTo(os); } + + // Describes the negation of this matcher to an ostream. + void DescribeNegationTo(::std::ostream* os) const { + impl_->DescribeNegationTo(os); + } + + // Explains why x matches, or doesn't match, the matcher. + void ExplainMatchResultTo(T x, ::std::ostream* os) const { + StreamMatchResultListener listener(os); + MatchAndExplain(x, &listener); + } + + // Returns the describer for this matcher object; retains ownership + // of the describer, which is only guaranteed to be alive when + // this matcher object is alive. + const MatcherDescriberInterface* GetDescriber() const { + return impl_.get(); + } + + protected: + MatcherBase() {} + + // Constructs a matcher from its implementation. + explicit MatcherBase(const MatcherInterface* impl) + : impl_(impl) {} + + virtual ~MatcherBase() {} + + private: + // shared_ptr (util/gtl/shared_ptr.h) and linked_ptr have similar + // interfaces. The former dynamically allocates a chunk of memory + // to hold the reference count, while the latter tracks all + // references using a circular linked list without allocating + // memory. It has been observed that linked_ptr performs better in + // typical scenarios. However, shared_ptr can out-perform + // linked_ptr when there are many more uses of the copy constructor + // than the default constructor. + // + // If performance becomes a problem, we should see if using + // shared_ptr helps. + ::testing::internal::linked_ptr > impl_; +}; + +} // namespace internal + +// A Matcher is a copyable and IMMUTABLE (except by assignment) +// object that can check whether a value of type T matches. The +// implementation of Matcher is just a linked_ptr to const +// MatcherInterface, so copying is fairly cheap. Don't inherit +// from Matcher! +template +class Matcher : public internal::MatcherBase { + public: + // Constructs a null matcher. Needed for storing Matcher objects in STL + // containers. A default-constructed matcher is not yet initialized. You + // cannot use it until a valid value has been assigned to it. + explicit Matcher() {} // NOLINT + + // Constructs a matcher from its implementation. + explicit Matcher(const MatcherInterface* impl) + : internal::MatcherBase(impl) {} + + // Implicit constructor here allows people to write + // EXPECT_CALL(foo, Bar(5)) instead of EXPECT_CALL(foo, Bar(Eq(5))) sometimes + Matcher(T value); // NOLINT +}; + +// The following two specializations allow the user to write str +// instead of Eq(str) and "foo" instead of Eq("foo") when a string +// matcher is expected. +template <> +class GTEST_API_ Matcher + : public internal::MatcherBase { + public: + Matcher() {} + + explicit Matcher(const MatcherInterface* impl) + : internal::MatcherBase(impl) {} + + // Allows the user to write str instead of Eq(str) sometimes, where + // str is a string object. + Matcher(const internal::string& s); // NOLINT + + // Allows the user to write "foo" instead of Eq("foo") sometimes. + Matcher(const char* s); // NOLINT +}; + +template <> +class GTEST_API_ Matcher + : public internal::MatcherBase { + public: + Matcher() {} + + explicit Matcher(const MatcherInterface* impl) + : internal::MatcherBase(impl) {} + + // Allows the user to write str instead of Eq(str) sometimes, where + // str is a string object. + Matcher(const internal::string& s); // NOLINT + + // Allows the user to write "foo" instead of Eq("foo") sometimes. + Matcher(const char* s); // NOLINT +}; + +#if GTEST_HAS_STRING_PIECE_ +// The following two specializations allow the user to write str +// instead of Eq(str) and "foo" instead of Eq("foo") when a StringPiece +// matcher is expected. +template <> +class GTEST_API_ Matcher + : public internal::MatcherBase { + public: + Matcher() {} + + explicit Matcher(const MatcherInterface* impl) + : internal::MatcherBase(impl) {} + + // Allows the user to write str instead of Eq(str) sometimes, where + // str is a string object. + Matcher(const internal::string& s); // NOLINT + + // Allows the user to write "foo" instead of Eq("foo") sometimes. + Matcher(const char* s); // NOLINT + + // Allows the user to pass StringPieces directly. + Matcher(StringPiece s); // NOLINT +}; + +template <> +class GTEST_API_ Matcher + : public internal::MatcherBase { + public: + Matcher() {} + + explicit Matcher(const MatcherInterface* impl) + : internal::MatcherBase(impl) {} + + // Allows the user to write str instead of Eq(str) sometimes, where + // str is a string object. + Matcher(const internal::string& s); // NOLINT + + // Allows the user to write "foo" instead of Eq("foo") sometimes. + Matcher(const char* s); // NOLINT + + // Allows the user to pass StringPieces directly. + Matcher(StringPiece s); // NOLINT +}; +#endif // GTEST_HAS_STRING_PIECE_ + +// The PolymorphicMatcher class template makes it easy to implement a +// polymorphic matcher (i.e. a matcher that can match values of more +// than one type, e.g. Eq(n) and NotNull()). +// +// To define a polymorphic matcher, a user should provide an Impl +// class that has a DescribeTo() method and a DescribeNegationTo() +// method, and define a member function (or member function template) +// +// bool MatchAndExplain(const Value& value, +// MatchResultListener* listener) const; +// +// See the definition of NotNull() for a complete example. +template +class PolymorphicMatcher { + public: + explicit PolymorphicMatcher(const Impl& an_impl) : impl_(an_impl) {} + + // Returns a mutable reference to the underlying matcher + // implementation object. + Impl& mutable_impl() { return impl_; } + + // Returns an immutable reference to the underlying matcher + // implementation object. + const Impl& impl() const { return impl_; } + + template + operator Matcher() const { + return Matcher(new MonomorphicImpl(impl_)); + } + + private: + template + class MonomorphicImpl : public MatcherInterface { + public: + explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {} + + virtual void DescribeTo(::std::ostream* os) const { + impl_.DescribeTo(os); + } + + virtual void DescribeNegationTo(::std::ostream* os) const { + impl_.DescribeNegationTo(os); + } + + virtual bool MatchAndExplain(T x, MatchResultListener* listener) const { + return impl_.MatchAndExplain(x, listener); + } + + private: + const Impl impl_; + + GTEST_DISALLOW_ASSIGN_(MonomorphicImpl); + }; + + Impl impl_; + + GTEST_DISALLOW_ASSIGN_(PolymorphicMatcher); +}; + +// Creates a matcher from its implementation. This is easier to use +// than the Matcher constructor as it doesn't require you to +// explicitly write the template argument, e.g. +// +// MakeMatcher(foo); +// vs +// Matcher(foo); +template +inline Matcher MakeMatcher(const MatcherInterface* impl) { + return Matcher(impl); +} + +// Creates a polymorphic matcher from its implementation. This is +// easier to use than the PolymorphicMatcher constructor as it +// doesn't require you to explicitly write the template argument, e.g. +// +// MakePolymorphicMatcher(foo); +// vs +// PolymorphicMatcher(foo); +template +inline PolymorphicMatcher MakePolymorphicMatcher(const Impl& impl) { + return PolymorphicMatcher(impl); +} + +// Anything inside the 'internal' namespace IS INTERNAL IMPLEMENTATION +// and MUST NOT BE USED IN USER CODE!!! +namespace internal { + +// The MatcherCastImpl class template is a helper for implementing +// MatcherCast(). We need this helper in order to partially +// specialize the implementation of MatcherCast() (C++ allows +// class/struct templates to be partially specialized, but not +// function templates.). + +// This general version is used when MatcherCast()'s argument is a +// polymorphic matcher (i.e. something that can be converted to a +// Matcher but is not one yet; for example, Eq(value)) or a value (for +// example, "hello"). +template +class MatcherCastImpl { + public: + static Matcher Cast(const M& polymorphic_matcher_or_value) { + // M can be a polymorhic matcher, in which case we want to use + // its conversion operator to create Matcher. Or it can be a value + // that should be passed to the Matcher's constructor. + // + // We can't call Matcher(polymorphic_matcher_or_value) when M is a + // polymorphic matcher because it'll be ambiguous if T has an implicit + // constructor from M (this usually happens when T has an implicit + // constructor from any type). + // + // It won't work to unconditionally implict_cast + // polymorphic_matcher_or_value to Matcher because it won't trigger + // a user-defined conversion from M to T if one exists (assuming M is + // a value). + return CastImpl( + polymorphic_matcher_or_value, + BooleanConstant< + internal::ImplicitlyConvertible >::value>()); + } + + private: + static Matcher CastImpl(const M& value, BooleanConstant) { + // M can't be implicitly converted to Matcher, so M isn't a polymorphic + // matcher. It must be a value then. Use direct initialization to create + // a matcher. + return Matcher(ImplicitCast_(value)); + } + + static Matcher CastImpl(const M& polymorphic_matcher_or_value, + BooleanConstant) { + // M is implicitly convertible to Matcher, which means that either + // M is a polymorhpic matcher or Matcher has an implicit constructor + // from M. In both cases using the implicit conversion will produce a + // matcher. + // + // Even if T has an implicit constructor from M, it won't be called because + // creating Matcher would require a chain of two user-defined conversions + // (first to create T from M and then to create Matcher from T). + return polymorphic_matcher_or_value; + } +}; + +// This more specialized version is used when MatcherCast()'s argument +// is already a Matcher. This only compiles when type T can be +// statically converted to type U. +template +class MatcherCastImpl > { + public: + static Matcher Cast(const Matcher& source_matcher) { + return Matcher(new Impl(source_matcher)); + } + + private: + class Impl : public MatcherInterface { + public: + explicit Impl(const Matcher& source_matcher) + : source_matcher_(source_matcher) {} + + // We delegate the matching logic to the source matcher. + virtual bool MatchAndExplain(T x, MatchResultListener* listener) const { + return source_matcher_.MatchAndExplain(static_cast(x), listener); + } + + virtual void DescribeTo(::std::ostream* os) const { + source_matcher_.DescribeTo(os); + } + + virtual void DescribeNegationTo(::std::ostream* os) const { + source_matcher_.DescribeNegationTo(os); + } + + private: + const Matcher source_matcher_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; +}; + +// This even more specialized version is used for efficiently casting +// a matcher to its own type. +template +class MatcherCastImpl > { + public: + static Matcher Cast(const Matcher& matcher) { return matcher; } +}; + +} // namespace internal + +// In order to be safe and clear, casting between different matcher +// types is done explicitly via MatcherCast(m), which takes a +// matcher m and returns a Matcher. It compiles only when T can be +// statically converted to the argument type of m. +template +inline Matcher MatcherCast(const M& matcher) { + return internal::MatcherCastImpl::Cast(matcher); +} + +// Implements SafeMatcherCast(). +// +// We use an intermediate class to do the actual safe casting as Nokia's +// Symbian compiler cannot decide between +// template ... (M) and +// template ... (const Matcher&) +// for function templates but can for member function templates. +template +class SafeMatcherCastImpl { + public: + // This overload handles polymorphic matchers and values only since + // monomorphic matchers are handled by the next one. + template + static inline Matcher Cast(const M& polymorphic_matcher_or_value) { + return internal::MatcherCastImpl::Cast(polymorphic_matcher_or_value); + } + + // This overload handles monomorphic matchers. + // + // In general, if type T can be implicitly converted to type U, we can + // safely convert a Matcher to a Matcher (i.e. Matcher is + // contravariant): just keep a copy of the original Matcher, convert the + // argument from type T to U, and then pass it to the underlying Matcher. + // The only exception is when U is a reference and T is not, as the + // underlying Matcher may be interested in the argument's address, which + // is not preserved in the conversion from T to U. + template + static inline Matcher Cast(const Matcher& matcher) { + // Enforce that T can be implicitly converted to U. + GTEST_COMPILE_ASSERT_((internal::ImplicitlyConvertible::value), + T_must_be_implicitly_convertible_to_U); + // Enforce that we are not converting a non-reference type T to a reference + // type U. + GTEST_COMPILE_ASSERT_( + internal::is_reference::value || !internal::is_reference::value, + cannot_convert_non_reference_arg_to_reference); + // In case both T and U are arithmetic types, enforce that the + // conversion is not lossy. + typedef GTEST_REMOVE_REFERENCE_AND_CONST_(T) RawT; + typedef GTEST_REMOVE_REFERENCE_AND_CONST_(U) RawU; + const bool kTIsOther = GMOCK_KIND_OF_(RawT) == internal::kOther; + const bool kUIsOther = GMOCK_KIND_OF_(RawU) == internal::kOther; + GTEST_COMPILE_ASSERT_( + kTIsOther || kUIsOther || + (internal::LosslessArithmeticConvertible::value), + conversion_of_arithmetic_types_must_be_lossless); + return MatcherCast(matcher); + } +}; + +template +inline Matcher SafeMatcherCast(const M& polymorphic_matcher) { + return SafeMatcherCastImpl::Cast(polymorphic_matcher); +} + +// A() returns a matcher that matches any value of type T. +template +Matcher A(); + +// Anything inside the 'internal' namespace IS INTERNAL IMPLEMENTATION +// and MUST NOT BE USED IN USER CODE!!! +namespace internal { + +// If the explanation is not empty, prints it to the ostream. +inline void PrintIfNotEmpty(const std::string& explanation, + ::std::ostream* os) { + if (explanation != "" && os != NULL) { + *os << ", " << explanation; + } +} + +// Returns true if the given type name is easy to read by a human. +// This is used to decide whether printing the type of a value might +// be helpful. +inline bool IsReadableTypeName(const std::string& type_name) { + // We consider a type name readable if it's short or doesn't contain + // a template or function type. + return (type_name.length() <= 20 || + type_name.find_first_of("<(") == std::string::npos); +} + +// Matches the value against the given matcher, prints the value and explains +// the match result to the listener. Returns the match result. +// 'listener' must not be NULL. +// Value cannot be passed by const reference, because some matchers take a +// non-const argument. +template +bool MatchPrintAndExplain(Value& value, const Matcher& matcher, + MatchResultListener* listener) { + if (!listener->IsInterested()) { + // If the listener is not interested, we do not need to construct the + // inner explanation. + return matcher.Matches(value); + } + + StringMatchResultListener inner_listener; + const bool match = matcher.MatchAndExplain(value, &inner_listener); + + UniversalPrint(value, listener->stream()); +#if GTEST_HAS_RTTI + const std::string& type_name = GetTypeName(); + if (IsReadableTypeName(type_name)) + *listener->stream() << " (of type " << type_name << ")"; +#endif + PrintIfNotEmpty(inner_listener.str(), listener->stream()); + + return match; +} + +// An internal helper class for doing compile-time loop on a tuple's +// fields. +template +class TuplePrefix { + public: + // TuplePrefix::Matches(matcher_tuple, value_tuple) returns true + // iff the first N fields of matcher_tuple matches the first N + // fields of value_tuple, respectively. + template + static bool Matches(const MatcherTuple& matcher_tuple, + const ValueTuple& value_tuple) { + return TuplePrefix::Matches(matcher_tuple, value_tuple) + && get(matcher_tuple).Matches(get(value_tuple)); + } + + // TuplePrefix::ExplainMatchFailuresTo(matchers, values, os) + // describes failures in matching the first N fields of matchers + // against the first N fields of values. If there is no failure, + // nothing will be streamed to os. + template + static void ExplainMatchFailuresTo(const MatcherTuple& matchers, + const ValueTuple& values, + ::std::ostream* os) { + // First, describes failures in the first N - 1 fields. + TuplePrefix::ExplainMatchFailuresTo(matchers, values, os); + + // Then describes the failure (if any) in the (N - 1)-th (0-based) + // field. + typename tuple_element::type matcher = + get(matchers); + typedef typename tuple_element::type Value; + Value value = get(values); + StringMatchResultListener listener; + if (!matcher.MatchAndExplain(value, &listener)) { + // TODO(wan): include in the message the name of the parameter + // as used in MOCK_METHOD*() when possible. + *os << " Expected arg #" << N - 1 << ": "; + get(matchers).DescribeTo(os); + *os << "\n Actual: "; + // We remove the reference in type Value to prevent the + // universal printer from printing the address of value, which + // isn't interesting to the user most of the time. The + // matcher's MatchAndExplain() method handles the case when + // the address is interesting. + internal::UniversalPrint(value, os); + PrintIfNotEmpty(listener.str(), os); + *os << "\n"; + } + } +}; + +// The base case. +template <> +class TuplePrefix<0> { + public: + template + static bool Matches(const MatcherTuple& /* matcher_tuple */, + const ValueTuple& /* value_tuple */) { + return true; + } + + template + static void ExplainMatchFailuresTo(const MatcherTuple& /* matchers */, + const ValueTuple& /* values */, + ::std::ostream* /* os */) {} +}; + +// TupleMatches(matcher_tuple, value_tuple) returns true iff all +// matchers in matcher_tuple match the corresponding fields in +// value_tuple. It is a compiler error if matcher_tuple and +// value_tuple have different number of fields or incompatible field +// types. +template +bool TupleMatches(const MatcherTuple& matcher_tuple, + const ValueTuple& value_tuple) { + // Makes sure that matcher_tuple and value_tuple have the same + // number of fields. + GTEST_COMPILE_ASSERT_(tuple_size::value == + tuple_size::value, + matcher_and_value_have_different_numbers_of_fields); + return TuplePrefix::value>:: + Matches(matcher_tuple, value_tuple); +} + +// Describes failures in matching matchers against values. If there +// is no failure, nothing will be streamed to os. +template +void ExplainMatchFailureTupleTo(const MatcherTuple& matchers, + const ValueTuple& values, + ::std::ostream* os) { + TuplePrefix::value>::ExplainMatchFailuresTo( + matchers, values, os); +} + +// TransformTupleValues and its helper. +// +// TransformTupleValuesHelper hides the internal machinery that +// TransformTupleValues uses to implement a tuple traversal. +template +class TransformTupleValuesHelper { + private: + typedef ::testing::tuple_size TupleSize; + + public: + // For each member of tuple 't', taken in order, evaluates '*out++ = f(t)'. + // Returns the final value of 'out' in case the caller needs it. + static OutIter Run(Func f, const Tuple& t, OutIter out) { + return IterateOverTuple()(f, t, out); + } + + private: + template + struct IterateOverTuple { + OutIter operator() (Func f, const Tup& t, OutIter out) const { + *out++ = f(::testing::get(t)); + return IterateOverTuple()(f, t, out); + } + }; + template + struct IterateOverTuple { + OutIter operator() (Func /* f */, const Tup& /* t */, OutIter out) const { + return out; + } + }; +}; + +// Successively invokes 'f(element)' on each element of the tuple 't', +// appending each result to the 'out' iterator. Returns the final value +// of 'out'. +template +OutIter TransformTupleValues(Func f, const Tuple& t, OutIter out) { + return TransformTupleValuesHelper::Run(f, t, out); +} + +// Implements A(). +template +class AnyMatcherImpl : public MatcherInterface { + public: + virtual bool MatchAndExplain( + T /* x */, MatchResultListener* /* listener */) const { return true; } + virtual void DescribeTo(::std::ostream* os) const { *os << "is anything"; } + virtual void DescribeNegationTo(::std::ostream* os) const { + // This is mostly for completeness' safe, as it's not very useful + // to write Not(A()). However we cannot completely rule out + // such a possibility, and it doesn't hurt to be prepared. + *os << "never matches"; + } +}; + +// Implements _, a matcher that matches any value of any +// type. This is a polymorphic matcher, so we need a template type +// conversion operator to make it appearing as a Matcher for any +// type T. +class AnythingMatcher { + public: + template + operator Matcher() const { return A(); } +}; + +// Implements a matcher that compares a given value with a +// pre-supplied value using one of the ==, <=, <, etc, operators. The +// two values being compared don't have to have the same type. +// +// The matcher defined here is polymorphic (for example, Eq(5) can be +// used to match an int, a short, a double, etc). Therefore we use +// a template type conversion operator in the implementation. +// +// The following template definition assumes that the Rhs parameter is +// a "bare" type (i.e. neither 'const T' nor 'T&'). +template +class ComparisonBase { + public: + explicit ComparisonBase(const Rhs& rhs) : rhs_(rhs) {} + template + operator Matcher() const { + return MakeMatcher(new Impl(rhs_)); + } + + private: + template + class Impl : public MatcherInterface { + public: + explicit Impl(const Rhs& rhs) : rhs_(rhs) {} + virtual bool MatchAndExplain( + Lhs lhs, MatchResultListener* /* listener */) const { + return Op()(lhs, rhs_); + } + virtual void DescribeTo(::std::ostream* os) const { + *os << D::Desc() << " "; + UniversalPrint(rhs_, os); + } + virtual void DescribeNegationTo(::std::ostream* os) const { + *os << D::NegatedDesc() << " "; + UniversalPrint(rhs_, os); + } + private: + Rhs rhs_; + GTEST_DISALLOW_ASSIGN_(Impl); + }; + Rhs rhs_; + GTEST_DISALLOW_ASSIGN_(ComparisonBase); +}; + +template +class EqMatcher : public ComparisonBase, Rhs, AnyEq> { + public: + explicit EqMatcher(const Rhs& rhs) + : ComparisonBase, Rhs, AnyEq>(rhs) { } + static const char* Desc() { return "is equal to"; } + static const char* NegatedDesc() { return "isn't equal to"; } +}; +template +class NeMatcher : public ComparisonBase, Rhs, AnyNe> { + public: + explicit NeMatcher(const Rhs& rhs) + : ComparisonBase, Rhs, AnyNe>(rhs) { } + static const char* Desc() { return "isn't equal to"; } + static const char* NegatedDesc() { return "is equal to"; } +}; +template +class LtMatcher : public ComparisonBase, Rhs, AnyLt> { + public: + explicit LtMatcher(const Rhs& rhs) + : ComparisonBase, Rhs, AnyLt>(rhs) { } + static const char* Desc() { return "is <"; } + static const char* NegatedDesc() { return "isn't <"; } +}; +template +class GtMatcher : public ComparisonBase, Rhs, AnyGt> { + public: + explicit GtMatcher(const Rhs& rhs) + : ComparisonBase, Rhs, AnyGt>(rhs) { } + static const char* Desc() { return "is >"; } + static const char* NegatedDesc() { return "isn't >"; } +}; +template +class LeMatcher : public ComparisonBase, Rhs, AnyLe> { + public: + explicit LeMatcher(const Rhs& rhs) + : ComparisonBase, Rhs, AnyLe>(rhs) { } + static const char* Desc() { return "is <="; } + static const char* NegatedDesc() { return "isn't <="; } +}; +template +class GeMatcher : public ComparisonBase, Rhs, AnyGe> { + public: + explicit GeMatcher(const Rhs& rhs) + : ComparisonBase, Rhs, AnyGe>(rhs) { } + static const char* Desc() { return "is >="; } + static const char* NegatedDesc() { return "isn't >="; } +}; + +// Implements the polymorphic IsNull() matcher, which matches any raw or smart +// pointer that is NULL. +class IsNullMatcher { + public: + template + bool MatchAndExplain(const Pointer& p, + MatchResultListener* /* listener */) const { +#if GTEST_LANG_CXX11 + return p == nullptr; +#else // GTEST_LANG_CXX11 + return GetRawPointer(p) == NULL; +#endif // GTEST_LANG_CXX11 + } + + void DescribeTo(::std::ostream* os) const { *os << "is NULL"; } + void DescribeNegationTo(::std::ostream* os) const { + *os << "isn't NULL"; + } +}; + +// Implements the polymorphic NotNull() matcher, which matches any raw or smart +// pointer that is not NULL. +class NotNullMatcher { + public: + template + bool MatchAndExplain(const Pointer& p, + MatchResultListener* /* listener */) const { +#if GTEST_LANG_CXX11 + return p != nullptr; +#else // GTEST_LANG_CXX11 + return GetRawPointer(p) != NULL; +#endif // GTEST_LANG_CXX11 + } + + void DescribeTo(::std::ostream* os) const { *os << "isn't NULL"; } + void DescribeNegationTo(::std::ostream* os) const { + *os << "is NULL"; + } +}; + +// Ref(variable) matches any argument that is a reference to +// 'variable'. This matcher is polymorphic as it can match any +// super type of the type of 'variable'. +// +// The RefMatcher template class implements Ref(variable). It can +// only be instantiated with a reference type. This prevents a user +// from mistakenly using Ref(x) to match a non-reference function +// argument. For example, the following will righteously cause a +// compiler error: +// +// int n; +// Matcher m1 = Ref(n); // This won't compile. +// Matcher m2 = Ref(n); // This will compile. +template +class RefMatcher; + +template +class RefMatcher { + // Google Mock is a generic framework and thus needs to support + // mocking any function types, including those that take non-const + // reference arguments. Therefore the template parameter T (and + // Super below) can be instantiated to either a const type or a + // non-const type. + public: + // RefMatcher() takes a T& instead of const T&, as we want the + // compiler to catch using Ref(const_value) as a matcher for a + // non-const reference. + explicit RefMatcher(T& x) : object_(x) {} // NOLINT + + template + operator Matcher() const { + // By passing object_ (type T&) to Impl(), which expects a Super&, + // we make sure that Super is a super type of T. In particular, + // this catches using Ref(const_value) as a matcher for a + // non-const reference, as you cannot implicitly convert a const + // reference to a non-const reference. + return MakeMatcher(new Impl(object_)); + } + + private: + template + class Impl : public MatcherInterface { + public: + explicit Impl(Super& x) : object_(x) {} // NOLINT + + // MatchAndExplain() takes a Super& (as opposed to const Super&) + // in order to match the interface MatcherInterface. + virtual bool MatchAndExplain( + Super& x, MatchResultListener* listener) const { + *listener << "which is located @" << static_cast(&x); + return &x == &object_; + } + + virtual void DescribeTo(::std::ostream* os) const { + *os << "references the variable "; + UniversalPrinter::Print(object_, os); + } + + virtual void DescribeNegationTo(::std::ostream* os) const { + *os << "does not reference the variable "; + UniversalPrinter::Print(object_, os); + } + + private: + const Super& object_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + T& object_; + + GTEST_DISALLOW_ASSIGN_(RefMatcher); +}; + +// Polymorphic helper functions for narrow and wide string matchers. +inline bool CaseInsensitiveCStringEquals(const char* lhs, const char* rhs) { + return String::CaseInsensitiveCStringEquals(lhs, rhs); +} + +inline bool CaseInsensitiveCStringEquals(const wchar_t* lhs, + const wchar_t* rhs) { + return String::CaseInsensitiveWideCStringEquals(lhs, rhs); +} + +// String comparison for narrow or wide strings that can have embedded NUL +// characters. +template +bool CaseInsensitiveStringEquals(const StringType& s1, + const StringType& s2) { + // Are the heads equal? + if (!CaseInsensitiveCStringEquals(s1.c_str(), s2.c_str())) { + return false; + } + + // Skip the equal heads. + const typename StringType::value_type nul = 0; + const size_t i1 = s1.find(nul), i2 = s2.find(nul); + + // Are we at the end of either s1 or s2? + if (i1 == StringType::npos || i2 == StringType::npos) { + return i1 == i2; + } + + // Are the tails equal? + return CaseInsensitiveStringEquals(s1.substr(i1 + 1), s2.substr(i2 + 1)); +} + +// String matchers. + +// Implements equality-based string matchers like StrEq, StrCaseNe, and etc. +template +class StrEqualityMatcher { + public: + StrEqualityMatcher(const StringType& str, bool expect_eq, + bool case_sensitive) + : string_(str), expect_eq_(expect_eq), case_sensitive_(case_sensitive) {} + + // Accepts pointer types, particularly: + // const char* + // char* + // const wchar_t* + // wchar_t* + template + bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { + if (s == NULL) { + return !expect_eq_; + } + return MatchAndExplain(StringType(s), listener); + } + + // Matches anything that can convert to StringType. + // + // This is a template, not just a plain function with const StringType&, + // because StringPiece has some interfering non-explicit constructors. + template + bool MatchAndExplain(const MatcheeStringType& s, + MatchResultListener* /* listener */) const { + const StringType& s2(s); + const bool eq = case_sensitive_ ? s2 == string_ : + CaseInsensitiveStringEquals(s2, string_); + return expect_eq_ == eq; + } + + void DescribeTo(::std::ostream* os) const { + DescribeToHelper(expect_eq_, os); + } + + void DescribeNegationTo(::std::ostream* os) const { + DescribeToHelper(!expect_eq_, os); + } + + private: + void DescribeToHelper(bool expect_eq, ::std::ostream* os) const { + *os << (expect_eq ? "is " : "isn't "); + *os << "equal to "; + if (!case_sensitive_) { + *os << "(ignoring case) "; + } + UniversalPrint(string_, os); + } + + const StringType string_; + const bool expect_eq_; + const bool case_sensitive_; + + GTEST_DISALLOW_ASSIGN_(StrEqualityMatcher); +}; + +// Implements the polymorphic HasSubstr(substring) matcher, which +// can be used as a Matcher as long as T can be converted to a +// string. +template +class HasSubstrMatcher { + public: + explicit HasSubstrMatcher(const StringType& substring) + : substring_(substring) {} + + // Accepts pointer types, particularly: + // const char* + // char* + // const wchar_t* + // wchar_t* + template + bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { + return s != NULL && MatchAndExplain(StringType(s), listener); + } + + // Matches anything that can convert to StringType. + // + // This is a template, not just a plain function with const StringType&, + // because StringPiece has some interfering non-explicit constructors. + template + bool MatchAndExplain(const MatcheeStringType& s, + MatchResultListener* /* listener */) const { + const StringType& s2(s); + return s2.find(substring_) != StringType::npos; + } + + // Describes what this matcher matches. + void DescribeTo(::std::ostream* os) const { + *os << "has substring "; + UniversalPrint(substring_, os); + } + + void DescribeNegationTo(::std::ostream* os) const { + *os << "has no substring "; + UniversalPrint(substring_, os); + } + + private: + const StringType substring_; + + GTEST_DISALLOW_ASSIGN_(HasSubstrMatcher); +}; + +// Implements the polymorphic StartsWith(substring) matcher, which +// can be used as a Matcher as long as T can be converted to a +// string. +template +class StartsWithMatcher { + public: + explicit StartsWithMatcher(const StringType& prefix) : prefix_(prefix) { + } + + // Accepts pointer types, particularly: + // const char* + // char* + // const wchar_t* + // wchar_t* + template + bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { + return s != NULL && MatchAndExplain(StringType(s), listener); + } + + // Matches anything that can convert to StringType. + // + // This is a template, not just a plain function with const StringType&, + // because StringPiece has some interfering non-explicit constructors. + template + bool MatchAndExplain(const MatcheeStringType& s, + MatchResultListener* /* listener */) const { + const StringType& s2(s); + return s2.length() >= prefix_.length() && + s2.substr(0, prefix_.length()) == prefix_; + } + + void DescribeTo(::std::ostream* os) const { + *os << "starts with "; + UniversalPrint(prefix_, os); + } + + void DescribeNegationTo(::std::ostream* os) const { + *os << "doesn't start with "; + UniversalPrint(prefix_, os); + } + + private: + const StringType prefix_; + + GTEST_DISALLOW_ASSIGN_(StartsWithMatcher); +}; + +// Implements the polymorphic EndsWith(substring) matcher, which +// can be used as a Matcher as long as T can be converted to a +// string. +template +class EndsWithMatcher { + public: + explicit EndsWithMatcher(const StringType& suffix) : suffix_(suffix) {} + + // Accepts pointer types, particularly: + // const char* + // char* + // const wchar_t* + // wchar_t* + template + bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { + return s != NULL && MatchAndExplain(StringType(s), listener); + } + + // Matches anything that can convert to StringType. + // + // This is a template, not just a plain function with const StringType&, + // because StringPiece has some interfering non-explicit constructors. + template + bool MatchAndExplain(const MatcheeStringType& s, + MatchResultListener* /* listener */) const { + const StringType& s2(s); + return s2.length() >= suffix_.length() && + s2.substr(s2.length() - suffix_.length()) == suffix_; + } + + void DescribeTo(::std::ostream* os) const { + *os << "ends with "; + UniversalPrint(suffix_, os); + } + + void DescribeNegationTo(::std::ostream* os) const { + *os << "doesn't end with "; + UniversalPrint(suffix_, os); + } + + private: + const StringType suffix_; + + GTEST_DISALLOW_ASSIGN_(EndsWithMatcher); +}; + +// Implements polymorphic matchers MatchesRegex(regex) and +// ContainsRegex(regex), which can be used as a Matcher as long as +// T can be converted to a string. +class MatchesRegexMatcher { + public: + MatchesRegexMatcher(const RE* regex, bool full_match) + : regex_(regex), full_match_(full_match) {} + + // Accepts pointer types, particularly: + // const char* + // char* + // const wchar_t* + // wchar_t* + template + bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { + return s != NULL && MatchAndExplain(std::string(s), listener); + } + + // Matches anything that can convert to std::string. + // + // This is a template, not just a plain function with const std::string&, + // because StringPiece has some interfering non-explicit constructors. + template + bool MatchAndExplain(const MatcheeStringType& s, + MatchResultListener* /* listener */) const { + const std::string& s2(s); + return full_match_ ? RE::FullMatch(s2, *regex_) : + RE::PartialMatch(s2, *regex_); + } + + void DescribeTo(::std::ostream* os) const { + *os << (full_match_ ? "matches" : "contains") + << " regular expression "; + UniversalPrinter::Print(regex_->pattern(), os); + } + + void DescribeNegationTo(::std::ostream* os) const { + *os << "doesn't " << (full_match_ ? "match" : "contain") + << " regular expression "; + UniversalPrinter::Print(regex_->pattern(), os); + } + + private: + const internal::linked_ptr regex_; + const bool full_match_; + + GTEST_DISALLOW_ASSIGN_(MatchesRegexMatcher); +}; + +// Implements a matcher that compares the two fields of a 2-tuple +// using one of the ==, <=, <, etc, operators. The two fields being +// compared don't have to have the same type. +// +// The matcher defined here is polymorphic (for example, Eq() can be +// used to match a tuple, a tuple, +// etc). Therefore we use a template type conversion operator in the +// implementation. +template +class PairMatchBase { + public: + template + operator Matcher< ::testing::tuple >() const { + return MakeMatcher(new Impl< ::testing::tuple >); + } + template + operator Matcher&>() const { + return MakeMatcher(new Impl&>); + } + + private: + static ::std::ostream& GetDesc(::std::ostream& os) { // NOLINT + return os << D::Desc(); + } + + template + class Impl : public MatcherInterface { + public: + virtual bool MatchAndExplain( + Tuple args, + MatchResultListener* /* listener */) const { + return Op()(::testing::get<0>(args), ::testing::get<1>(args)); + } + virtual void DescribeTo(::std::ostream* os) const { + *os << "are " << GetDesc; + } + virtual void DescribeNegationTo(::std::ostream* os) const { + *os << "aren't " << GetDesc; + } + }; +}; + +class Eq2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "an equal pair"; } +}; +class Ne2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "an unequal pair"; } +}; +class Lt2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "a pair where the first < the second"; } +}; +class Gt2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "a pair where the first > the second"; } +}; +class Le2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "a pair where the first <= the second"; } +}; +class Ge2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "a pair where the first >= the second"; } +}; + +// Implements the Not(...) matcher for a particular argument type T. +// We do not nest it inside the NotMatcher class template, as that +// will prevent different instantiations of NotMatcher from sharing +// the same NotMatcherImpl class. +template +class NotMatcherImpl : public MatcherInterface { + public: + explicit NotMatcherImpl(const Matcher& matcher) + : matcher_(matcher) {} + + virtual bool MatchAndExplain(T x, MatchResultListener* listener) const { + return !matcher_.MatchAndExplain(x, listener); + } + + virtual void DescribeTo(::std::ostream* os) const { + matcher_.DescribeNegationTo(os); + } + + virtual void DescribeNegationTo(::std::ostream* os) const { + matcher_.DescribeTo(os); + } + + private: + const Matcher matcher_; + + GTEST_DISALLOW_ASSIGN_(NotMatcherImpl); +}; + +// Implements the Not(m) matcher, which matches a value that doesn't +// match matcher m. +template +class NotMatcher { + public: + explicit NotMatcher(InnerMatcher matcher) : matcher_(matcher) {} + + // This template type conversion operator allows Not(m) to be used + // to match any type m can match. + template + operator Matcher() const { + return Matcher(new NotMatcherImpl(SafeMatcherCast(matcher_))); + } + + private: + InnerMatcher matcher_; + + GTEST_DISALLOW_ASSIGN_(NotMatcher); +}; + +// Implements the AllOf(m1, m2) matcher for a particular argument type +// T. We do not nest it inside the BothOfMatcher class template, as +// that will prevent different instantiations of BothOfMatcher from +// sharing the same BothOfMatcherImpl class. +template +class BothOfMatcherImpl : public MatcherInterface { + public: + BothOfMatcherImpl(const Matcher& matcher1, const Matcher& matcher2) + : matcher1_(matcher1), matcher2_(matcher2) {} + + virtual void DescribeTo(::std::ostream* os) const { + *os << "("; + matcher1_.DescribeTo(os); + *os << ") and ("; + matcher2_.DescribeTo(os); + *os << ")"; + } + + virtual void DescribeNegationTo(::std::ostream* os) const { + *os << "("; + matcher1_.DescribeNegationTo(os); + *os << ") or ("; + matcher2_.DescribeNegationTo(os); + *os << ")"; + } + + virtual bool MatchAndExplain(T x, MatchResultListener* listener) const { + // If either matcher1_ or matcher2_ doesn't match x, we only need + // to explain why one of them fails. + StringMatchResultListener listener1; + if (!matcher1_.MatchAndExplain(x, &listener1)) { + *listener << listener1.str(); + return false; + } + + StringMatchResultListener listener2; + if (!matcher2_.MatchAndExplain(x, &listener2)) { + *listener << listener2.str(); + return false; + } + + // Otherwise we need to explain why *both* of them match. + const std::string s1 = listener1.str(); + const std::string s2 = listener2.str(); + + if (s1 == "") { + *listener << s2; + } else { + *listener << s1; + if (s2 != "") { + *listener << ", and " << s2; + } + } + return true; + } + + private: + const Matcher matcher1_; + const Matcher matcher2_; + + GTEST_DISALLOW_ASSIGN_(BothOfMatcherImpl); +}; + +#if GTEST_LANG_CXX11 +// MatcherList provides mechanisms for storing a variable number of matchers in +// a list structure (ListType) and creating a combining matcher from such a +// list. +// The template is defined recursively using the following template paramters: +// * kSize is the length of the MatcherList. +// * Head is the type of the first matcher of the list. +// * Tail denotes the types of the remaining matchers of the list. +template +struct MatcherList { + typedef MatcherList MatcherListTail; + typedef ::std::pair ListType; + + // BuildList stores variadic type values in a nested pair structure. + // Example: + // MatcherList<3, int, string, float>::BuildList(5, "foo", 2.0) will return + // the corresponding result of type pair>. + static ListType BuildList(const Head& matcher, const Tail&... tail) { + return ListType(matcher, MatcherListTail::BuildList(tail...)); + } + + // CreateMatcher creates a Matcher from a given list of matchers (built + // by BuildList()). CombiningMatcher is used to combine the matchers of the + // list. CombiningMatcher must implement MatcherInterface and have a + // constructor taking two Matchers as input. + template class CombiningMatcher> + static Matcher CreateMatcher(const ListType& matchers) { + return Matcher(new CombiningMatcher( + SafeMatcherCast(matchers.first), + MatcherListTail::template CreateMatcher( + matchers.second))); + } +}; + +// The following defines the base case for the recursive definition of +// MatcherList. +template +struct MatcherList<2, Matcher1, Matcher2> { + typedef ::std::pair ListType; + + static ListType BuildList(const Matcher1& matcher1, + const Matcher2& matcher2) { + return ::std::pair(matcher1, matcher2); + } + + template class CombiningMatcher> + static Matcher CreateMatcher(const ListType& matchers) { + return Matcher(new CombiningMatcher( + SafeMatcherCast(matchers.first), + SafeMatcherCast(matchers.second))); + } +}; + +// VariadicMatcher is used for the variadic implementation of +// AllOf(m_1, m_2, ...) and AnyOf(m_1, m_2, ...). +// CombiningMatcher is used to recursively combine the provided matchers +// (of type Args...). +template