v4.3 update. (#2709)
* v4.3 update. * Update the cute_dsl_api changelog's doc link * Update version to 4.3.0 * Update the example link * Update doc to encourage user to install DSL from requirements.txt --------- Co-authored-by: Larry Wu <larwu@nvidia.com>
This commit is contained in:
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -43,7 +43,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -69,24 +69,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor(raw_ptr(0x000000000736b0c0: f32, generic, align<4>) o (8,5):(5,1), data=\n",
|
||||
" [[ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, ],\n",
|
||||
" [ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, ],\n",
|
||||
" [ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, ],\n",
|
||||
" ...\n",
|
||||
" [ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, ],\n",
|
||||
" [ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, ],\n",
|
||||
" [ 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, ]])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"\n",
|
||||
@ -115,12 +100,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cutlass.cute.runtime import from_dlpack\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@cute.jit\n",
|
||||
"def print_tensor_dlpack(src: cute.Tensor):\n",
|
||||
" print(src)\n",
|
||||
@ -129,25 +115,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor<ptr<f32, generic> o (8,5):(5,1)>\n",
|
||||
"tensor(raw_ptr(0x0000000007559340: f32, generic, align<4>) o (8,5):(5,1), data=\n",
|
||||
" [[-1.151769, 1.019397, -0.371175, -0.717776, 0.502176, ],\n",
|
||||
" [ 0.114282, 0.900084, 0.320770, 1.564574, -0.632329, ],\n",
|
||||
" [-0.570140, 0.178112, -0.423079, 1.936198, 0.003355, ],\n",
|
||||
" ...\n",
|
||||
" [-2.425393, -0.275528, 1.267157, -0.811101, -0.985456, ],\n",
|
||||
" [ 0.777889, -2.114074, 0.357184, -0.321312, -0.938138, ],\n",
|
||||
" [ 1.959564, 1.797602, 0.116901, 0.306198, -1.837295, ]])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a = torch.randn(8, 5, dtype=torch_dtype(cutlass.Float32))\n",
|
||||
"\n",
|
||||
@ -156,25 +126,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor<ptr<f32, generic> o (8,8):(8,1)>\n",
|
||||
"tensor(raw_ptr(0x0000000007979da0: f32, generic, align<4>) o (8,8):(8,1), data=\n",
|
||||
" [[ 0.122739, -0.605744, -1.442022, ..., -0.356501, -0.993329, -0.091110, ],\n",
|
||||
" [ 0.278448, 0.318482, -0.276867, ..., 1.542181, -1.701539, -0.309454, ],\n",
|
||||
" [ 0.563565, -0.753936, 0.131214, ..., 0.437912, -0.482277, -0.051540, ],\n",
|
||||
" ...\n",
|
||||
" [-1.974096, -0.177881, 0.426807, ..., -1.579115, -0.304974, 0.451164, ],\n",
|
||||
" [ 0.149851, -0.704689, -0.295063, ..., -0.653001, 0.008871, 0.903916, ],\n",
|
||||
" [ 1.188619, 1.519662, 1.270734, ..., 0.404082, 0.173200, 0.093476, ]])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
@ -211,39 +165,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"a[2] = 10.000000 (equivalent to a[(2,0)])\n",
|
||||
"a[9] = 6.000000 (equivalent to a[(1,1)])\n",
|
||||
"a[2,0] = 10.000000\n",
|
||||
"a[2,4] = 14.000000\n",
|
||||
"a[(2,4)] = 14.000000\n",
|
||||
"a[2,3] = 100.000000\n",
|
||||
"a[(2,4)] = 101.000000\n",
|
||||
"tensor([[ 0., 1., 2., 3., 4.],\n",
|
||||
" [ 5., 6., 7., 8., 9.],\n",
|
||||
" [ 10., 11., 12., 100., 101.],\n",
|
||||
" [ 15., 16., 17., 18., 19.],\n",
|
||||
" [ 20., 21., 22., 23., 24.],\n",
|
||||
" [ 25., 26., 27., 28., 29.],\n",
|
||||
" [ 30., 31., 32., 33., 34.],\n",
|
||||
" [ 35., 36., 37., 38., 39.]])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@cute.jit\n",
|
||||
"def tensor_access_item(a: cute.Tensor):\n",
|
||||
" # access data using linear index\n",
|
||||
" cute.printf(\"a[2] = {} (equivalent to a[{}])\", a[2],\n",
|
||||
" cute.make_identity_tensor(a.layout.shape)[2])\n",
|
||||
" cute.printf(\"a[9] = {} (equivalent to a[{}])\", a[9],\n",
|
||||
" cute.make_identity_tensor(a.layout.shape)[9])\n",
|
||||
" cute.printf(\n",
|
||||
" \"a[2] = {} (equivalent to a[{}])\",\n",
|
||||
" a[2],\n",
|
||||
" cute.make_identity_tensor(a.layout.shape)[2],\n",
|
||||
" )\n",
|
||||
" cute.printf(\n",
|
||||
" \"a[9] = {} (equivalent to a[{}])\",\n",
|
||||
" a[9],\n",
|
||||
" cute.make_identity_tensor(a.layout.shape)[9],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # access data using n-d coordinates, following two are equivalent\n",
|
||||
" cute.printf(\"a[2,0] = {}\", a[2, 0])\n",
|
||||
@ -251,14 +189,14 @@
|
||||
" cute.printf(\"a[(2,4)] = {}\", a[2, 4])\n",
|
||||
"\n",
|
||||
" # assign value to tensor@(2,4)\n",
|
||||
" a[2,3] = 100.0\n",
|
||||
" a[2,4] = 101.0\n",
|
||||
" cute.printf(\"a[2,3] = {}\", a[2,3])\n",
|
||||
" cute.printf(\"a[(2,4)] = {}\", a[(2,4)])\n",
|
||||
" a[2, 3] = 100.0\n",
|
||||
" a[2, 4] = 101.0\n",
|
||||
" cute.printf(\"a[2,3] = {}\", a[2, 3])\n",
|
||||
" cute.printf(\"a[(2,4)] = {}\", a[(2, 4)])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Create a tensor with sequential data using torch\n",
|
||||
"data = torch.arange(0, 8*5, dtype=torch.float32).reshape(8, 5)\n",
|
||||
"data = torch.arange(0, 8 * 5, dtype=torch.float32).reshape(8, 5)\n",
|
||||
"tensor_access_item(from_dlpack(data))\n",
|
||||
"\n",
|
||||
"print(data)"
|
||||
@ -287,14 +225,17 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Coordinate Tensor\n",
|
||||
"## Coordinate Tensors\n",
|
||||
"\n",
|
||||
"A coordinate tensor is a special type of tensor that maps coordinates to coordinates rather than to values. \n",
|
||||
"The key distinction is that while regular tensors map coordinates to some value type (like numbers), \n",
|
||||
"coordinate tensors map coordinates to other coordinates.\n",
|
||||
"### Definition and Properties\n",
|
||||
"\n",
|
||||
"For example, given a shape (4,4), a coordinate tensor using row-major layout would appear as:\n",
|
||||
"A coordinate tensor $T: Z^n → Z^m$ is a mathematical structure that establishes a mapping between coordinate spaces. Unlike standard tensors that map coordinates to scalar values, coordinate tensors map coordinates to other coordinates, forming a fundamental building block for tensor operations and transformations.\n",
|
||||
"\n",
|
||||
"### Examples\n",
|
||||
"\n",
|
||||
"Consider a `(4,4)` coordinate tensor:\n",
|
||||
"\n",
|
||||
"**Row-Major Layout (C-style):**\n",
|
||||
"\\begin{bmatrix} \n",
|
||||
"(0,0) & (0,1) & (0,2) & (0,3) \\\\\n",
|
||||
"(1,0) & (1,1) & (1,2) & (1,3) \\\\\n",
|
||||
@ -302,8 +243,7 @@
|
||||
"(3,0) & (3,1) & (3,2) & (3,3)\n",
|
||||
"\\end{bmatrix}\n",
|
||||
"\n",
|
||||
"The same shape with a column-major layout would appear as:\n",
|
||||
"\n",
|
||||
"**Column-Major Layout (Fortran-style):**\n",
|
||||
"\\begin{bmatrix}\n",
|
||||
"(0,0) & (1,0) & (2,0) & (3,0) \\\\\n",
|
||||
"(0,1) & (1,1) & (2,1) & (3,1) \\\\\n",
|
||||
@ -311,40 +251,50 @@
|
||||
"(0,3) & (1,3) & (2,3) & (3,3)\n",
|
||||
"\\end{bmatrix}\n",
|
||||
"\n",
|
||||
"The key points about coordinate tensors are:\n",
|
||||
"- Each element in the tensor is itself a coordinate tuple (i,j) rather than a scalar value\n",
|
||||
"- The coordinates map to themselves - so position (1,2) contains the coordinate (1,2)\n",
|
||||
"- The layout (row-major vs column-major) determines how these coordinate tuples are arranged in memory\n",
|
||||
"### Identity Tensor\n",
|
||||
"\n",
|
||||
"For example, coordinate tensors can be created using the `make_identity_tensor` utility:\n",
|
||||
"An identity tensor $I$ is a special case of a coordinate tensor that implements the identity mapping function:\n",
|
||||
"\n",
|
||||
"**Definition:**\n",
|
||||
"For a given shape $S = (s_1, s_2, ..., s_n)$, the identity tensor $I$ satisfies: $I(c) = c, \\forall c \\in \\prod_{i=1}^n [0, s_i)$\n",
|
||||
"\n",
|
||||
"**Properties:**\n",
|
||||
"1. **Bijective Mapping**: The identity tensor establishes a one-to-one correspondence between coordinates.\n",
|
||||
"2. **Layout Invariance**: The logical structure remains constant regardless of the underlying memory layout.\n",
|
||||
"3. **Coordinate Preservation**: For any coordinate c, I(c) = c.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CuTe establishes an isomorphism between 1-D indices and N-D coordinates through lexicographical ordering. For a coordinate c = (c₁, c₂, ..., cₙ) in an identity tensor with shape S = (s₁, s₂, ..., sₙ):\n",
|
||||
"\n",
|
||||
"**Linear Index Formula:**\n",
|
||||
"$\\text{idx} = c_1 + \\sum_{i=2}^{n} \\left(c_i \\prod_{j=1}^{i-1} s_j\\right)$\n",
|
||||
"\n",
|
||||
"**Example:**\n",
|
||||
"```python\n",
|
||||
"# Create an identity tensor from a given shape\n",
|
||||
"coord_tensor = make_identity_tensor(layout.shape())\n",
|
||||
"\n",
|
||||
"# Access coordinate using linear index\n",
|
||||
"coord = coord_tensor[linear_idx] # Returns the N-D coordinate\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This creates a tensor that maps each coordinate to itself, providing a reference point for understanding how other layouts transform these coordinates."
|
||||
"This bidirectional mapping enables efficient conversion from linear indices to N-dimensional coordinates, facilitating tensor operations and memory access patterns."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor<(0,0) o (8,4):(1@0,1@1)>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@cute.jit\n",
|
||||
"def print_tensor_coord(a: cute.Tensor):\n",
|
||||
" coord_tensor = cute.make_identity_tensor(a.layout.shape)\n",
|
||||
" print(coord_tensor)\n",
|
||||
" cute.print_tensor(coord_tensor)\n",
|
||||
"\n",
|
||||
"a = torch.randn(8,4, dtype=torch_dtype(cutlass.Float32))\n",
|
||||
"\n",
|
||||
"a = torch.randn(8, 4, dtype=torch_dtype(cutlass.Float32))\n",
|
||||
"print_tensor_coord(from_dlpack(a))"
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user