@ -0,0 +1,274 @@
|
||||
conv2d fprop_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1767700736 2104699940 3506659864 557648934
|
||||
conv2d fprop_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1539314507 3971227455 1976927351 1642148785
|
||||
conv2d fprop_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 276489656 653235219 3147305346 880610205
|
||||
conv2d fprop_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 272457724 2178229139 2786201726 4170295839
|
||||
conv2d fprop_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 242235041 2149454506 784935854 682531065
|
||||
conv2d fprop_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 215903418 3478189705 1667216236 1437761176
|
||||
conv2d fprop_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 215903418 379326961 1780379994 3740415776
|
||||
conv2d fprop_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 215903418 924848818 3533854396 2683779476
|
||||
conv2d fprop_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2870331951 359232443 2147867990 1653277018
|
||||
conv2d fprop_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2870331951 3784314846 2644315999 4224154526
|
||||
conv2d fprop_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3787448414 3562991793 535073859 2563373454
|
||||
conv2d fprop_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 426169840 2464808416 864648234 461884698
|
||||
conv2d fprop_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2564934525 3910792915 3577331017 827498183
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 28479234 867695528 1947311971 83328334
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4192922822 4244595864 2296602326 2349214706
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 274678245 3464152269 1682550229 3446204619
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3993280136 828543035 1319748516 956044554
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 832003025 3799813757 4030292245 457791957
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1444316594 4129865888 93616503 412257611
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2931873718 1841508064 1497852219 36703874
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2931873718 1841508064 1497852219 1842147148
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1612565294 109894479 1782187316 3370789453
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 841569299 1010785577 1158956167 3261208135
|
||||
conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1893352157 48149942 3544807462 446577726
|
||||
conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 fnhwc_fnhwc_fnhwc_f_f 3585320147 2150950452 1625817025 3964129474
|
||||
conv2d dgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 289918791 2624928614 3423533117 3186342135
|
||||
conv2d dgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672413387 2732296888 1838622641 4203745561
|
||||
conv2d dgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2754803027 3456572634 893492926 1966259884
|
||||
conv2d dgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 671982235 4014726279 4027869577 1510990157
|
||||
conv2d dgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 798317794 4140605332 3580988556 3425909428
|
||||
conv2d dgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1721270411 2106553169 835800311 3417471222
|
||||
conv2d dgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2756413475 860217059 166776702 1109666471
|
||||
conv2d dgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2128738105 855244826 2670006594 3857976152
|
||||
conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1931093565 3079461262 3579256638 2926210806
|
||||
conv2d dgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2472246681 2952423142 2045838875 3445165841
|
||||
conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2956871200 2133381336 2601441527 2035094220
|
||||
conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 365467186 1700915522 2515933441 406719240
|
||||
conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_h_h 3347784734 156533442 1012781676 688128904
|
||||
conv2d dgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 927718585 3117803557 1370701307 1462167731
|
||||
conv2d dgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4110991321 973422497 1926250028 3440543762
|
||||
conv2d dgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 832653836 2892862516 3649300762 1521470286
|
||||
conv2d dgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2075083065 3181416651 1733426984 872275640
|
||||
conv2d dgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4005590448 1639170045 388151578 4186957447
|
||||
conv2d dgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 181075276 1433744686 860506550 3475157408
|
||||
conv2d dgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1513864544 1747719409 877465841 2345541783
|
||||
conv2d dgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 856324887 2307248012 337386755 3363072703
|
||||
conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1906605830 722034901 2562804622 2508759317
|
||||
conv2d dgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 805717279 2196645331 3235235362 1518334120
|
||||
conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3168796339 72559978 778918419 1260968000
|
||||
conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 261954979 2634885882 451986822 3792829599
|
||||
conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 3747142491 2426759809 2622222681 371723930
|
||||
conv2d dgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2056905385 3612826298 2531545294 476754549
|
||||
conv2d dgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4120764770 2391975923 197605094 3409942185
|
||||
conv2d dgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1972540475 3071904063 408984565 2378809888
|
||||
conv2d dgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3414629540 3067676760 1540919649 2008865071
|
||||
conv2d dgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4100326666 1085505037 2778215386 230227569
|
||||
conv2d dgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3662895757 2731079464 3570839563 3483629877
|
||||
conv2d dgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2044596379 408419601 3415600242 2106927195
|
||||
conv2d dgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2154102133 3606099389 4034802752 3200055633
|
||||
conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2609259399 3910244699 1319285699 2229775542
|
||||
conv2d dgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2948772873 2780071616 2703730845 3090625734
|
||||
conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 752289976 4278696824 360883914 3802692600
|
||||
conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3723912751 653419877 359675571 283806385
|
||||
conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 fnhwc_fnhwc_fnhwc_f_f 2027599472 1075980921 3101013494 2025203940
|
||||
conv2d fprop_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 991402150 1393431534 1148212814 1350914659
|
||||
conv2d fprop_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4208297221 4283492776 419570292 1210341563
|
||||
conv2d fprop_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4178596783 3828059710 2735749436 2671012171
|
||||
conv2d fprop_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 924522595 563724475 3750778972 4152580670
|
||||
conv2d fprop_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1021044158 1686067905 3765040166 4102272733
|
||||
conv2d fprop_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3335547 2674994719 635224486 2759329777
|
||||
conv2d fprop_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3335547 4201252830 2920298728 304256151
|
||||
conv2d fprop_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3335547 70289262 646435722 4137562540
|
||||
conv2d fprop_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1317457392 1288095320 2132879813 656196754
|
||||
conv2d fprop_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1317457392 2202157489 2326567490 2475188414
|
||||
conv2d fprop_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2476454437 1857118302 4164386062 239840568
|
||||
conv2d fprop_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2767650699 3514840131 590439733 3879821123
|
||||
conv2d fprop_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3896287283 3112762669 2515107934 2106635937
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1903067870 1021832870 3003938078 2751931686
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3489785028 2466126497 1374078692 2737628040
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2051350923 263676708 3639860119 1370886256
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 719099834 1474713672 204857540 2768940347
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3441724486 3162593831 421721594 3097845598
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2034354027 1249407570 2567025479 1441082595
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 941893937 3608468045 635631428 2369653089
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 941893937 3608468045 635631428 1218705038
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 172579142 319546523 718795680 1453661415
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2823351660 1326352711 1110204809 1155441703
|
||||
conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3238446487 2572503545 686287700 1559476701
|
||||
conv2d fprop_1x8x8x1_4x4_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 991402150 1883874274 1180207512 3934800419
|
||||
conv2d fprop_1x16x16x1_8x8_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 24290453 4230587034 4117433929 2540623821
|
||||
conv2d fprop_1x16x16x1_12x12_16x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 24290453 3802993432 1563447158 515257167
|
||||
conv2d fprop_1x224x224x1_220x220_32x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 7656882 2583340103 3928463259 1564251818
|
||||
conv2d fprop_1x224x224x1_110x110_64x7x7_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 7656882 2966178620 3457283045 1726663817
|
||||
conv2d fprop_1x224x224x1_222x222_64x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 7656882 1794561978 3101289788 3492498648
|
||||
conv2d fprop_1x224x224x1_111x111_64x5x5_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 7656882 1794561978 498358130 4111289929
|
||||
conv2d fprop_1x8x8x2_4x4_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2693144988 3876248534 3038023830 1910263513
|
||||
conv2d fprop_1x16x16x2_8x8_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4208297221 3355193355 319259163 535683577
|
||||
conv2d fprop_1x16x16x2_12x12_16x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4208297221 1548147432 3385829172 2741952709
|
||||
conv2d fprop_1x224x224x2_220x220_32x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3479872296 2686562907 3948710179 3669872932
|
||||
conv2d fprop_1x224x224x2_110x110_64x7x7_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3479872296 576815792 2317227037 1211532666
|
||||
conv2d fprop_1x224x224x2_222x222_64x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3479872296 27596985 555460201 895685163
|
||||
conv2d fprop_1x224x224x2_111x111_64x5x5_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3479872296 27596985 1465341652 2228916523
|
||||
conv2d fprop_1x8x8x4_4x4_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 24290453 137535877 1436667267 1395660627
|
||||
conv2d fprop_1x224x224x4_220x220_32x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2495921302 2226159049 4051661898 209529384
|
||||
conv2d fprop_1x224x224x4_110x110_64x7x7_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2495921302 3541851870 2271016226 2671623385
|
||||
conv2d fprop_1x224x224x4_222x222_64x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2495921302 982184919 2007343215 3362992769
|
||||
conv2d fprop_1x224x224x4_111x111_64x5x5_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2495921302 982184919 20610297 1086800078
|
||||
conv2d fprop_1x8x8x8_4x4_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4208297221 3117444553 1497663382 3561001103
|
||||
conv2d fprop_1x224x224x8_220x220_32x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3188907679 1414143072 827338392 2827855918
|
||||
conv2d fprop_1x224x224x8_110x110_64x7x7_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3188907679 3886996022 26545788 3407771964
|
||||
conv2d fprop_1x224x224x8_222x222_64x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3188907679 380272816 2374613655 3601677176
|
||||
conv2d fprop_1x224x224x8_111x111_64x5x5_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3188907679 380272816 778374730 2110111988
|
||||
conv2d fprop_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1736512560 49406874 846358010 3314905564
|
||||
conv2d fprop_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1848484956 1432417472 1903569827 3750799351
|
||||
conv2d fprop_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4236427320 3696009469 69852620 201921851
|
||||
conv2d fprop_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 109006944 450017448 1793784844 903209915
|
||||
conv2d fprop_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 813367872 2397796503 1928191746 3210229460
|
||||
conv2d fprop_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1348284291 1307184141 46021356 1674017987
|
||||
conv2d fprop_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1348284291 1212511562 3331767121 2446286369
|
||||
conv2d fprop_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1348284291 2013675943 1681111033 1469213228
|
||||
conv2d fprop_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1703349794 500298386 3218034344 4159283207
|
||||
conv2d fprop_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1703349794 1123534155 145385311 4273847179
|
||||
conv2d fprop_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3862659311 349459322 1503631520 1404971956
|
||||
conv2d fprop_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1623686755 961217371 552550209 3980749384
|
||||
conv2d fprop_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3554927580 1131648083 4149599295 3119557776
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1767639287 3350675774 128324027 1059816532
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3986143536 17411088 40173029 1694092310
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1157793540 3513299281 48848814 1435528367
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 988962069 4292634763 388976034 2674929544
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4202383208 3529769234 1046186503 3368902675
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 856448884 3057259762 2063087558 1995545427
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2281940872 144496548 2455451862 400986166
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2281940872 144496548 2455451862 1082696406
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2702905851 1992889713 731289041 608504198
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2742293143 4197915274 606840 3671124731
|
||||
conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 149434841 2288560511 2994968424 2881838300
|
||||
conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_h_h 2226824643 327135318 3718671210 2121176659
|
||||
conv2d fprop_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672413387 3254575292 1119957081 672831271
|
||||
conv2d fprop_1x4x4x14_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3115523958 3622905002 4020453928 3853387318
|
||||
conv2d fprop_1x23x56x98_10x22_128x3x3_pad_h4w5_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1702870033 1876930844 1190400523 3937287850
|
||||
conv2d fprop_1x4x4x28_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2754803027 2587856937 2021107274 2789519899
|
||||
conv2d fprop_1x23x56x100_10x22_128x3x3_pad_h4w5_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2368669977 1353376771 744357395 786349633
|
||||
conv2d fprop_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 991402150 1393431534 2496492611 3901723984
|
||||
conv2d fprop_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4208297221 4283492776 3148637036 258220505
|
||||
conv2d fprop_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4178596783 3828059710 281106520 1103939403
|
||||
conv2d fprop_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 924522595 563724475 1938163814 2197809394
|
||||
conv2d fprop_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1021044158 1686067905 350851834 3999808950
|
||||
conv2d fprop_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3335547 2674994719 1034822169 1611033520
|
||||
conv2d fprop_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3335547 4201252830 1597212204 2181492560
|
||||
conv2d fprop_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3335547 70289262 3001492060 1379239000
|
||||
conv2d fprop_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1317457392 1288095320 4211138051 2804617605
|
||||
conv2d fprop_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1317457392 2202157489 1043108884 2923122465
|
||||
conv2d fprop_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2476454437 1857118302 3877008798 1206012078
|
||||
conv2d fprop_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2767650699 3514840131 2946529611 3907056932
|
||||
conv2d fprop_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3896287283 3112762669 1581171257 3959460786
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1903067870 1021832870 1926804094 1756790353
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3489785028 2466126497 1712378956 434322965
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2051350923 263676708 355203300 821870356
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 719099834 1474713672 2886387159 4086314983
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3441724486 3162593831 1422796372 2049419539
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2034354027 1249407570 1196036582 2684312264
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 941893937 3608468045 2198911423 1060050551
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 941893937 3608468045 2198911423 3361618746
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 172579142 319546523 2332616929 543467298
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2823351660 1326352711 3839068434 65031397
|
||||
conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3238446487 2572503545 3604065639 2111204111
|
||||
conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 2149247508 1775375365 2663631601 1249487679
|
||||
conv2d fprop_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4120764770 403997062 1679063623 4062928786
|
||||
conv2d dgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4110991321 3464637181 1623218578 436154205
|
||||
conv2d dgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4110991321 1479940693 3253144559 3883419107
|
||||
conv2d dgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 832653836 1871463331 2425320272 74566211
|
||||
conv2d dgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3484040069 664160900 3610888033 22347127
|
||||
conv2d dgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1513864544 1924855848 1382111427 2541177413
|
||||
conv2d dgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 868180534 1764715518 3070473696 2392864704
|
||||
conv2d dgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3437976747 666906244 3401957738 2050602745
|
||||
conv2d dgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4195072693 1575210381 781892324 2848949054
|
||||
conv2d dgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3457330201 2316839359 1539389419 4293781748
|
||||
conv2d dgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 754609939 2469024119 2885305868 2693098375
|
||||
conv2d dgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 754609939 2469024119 2885305868 1969608051
|
||||
conv2d dgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1690216859 554790212 2885143346 780489333
|
||||
conv2d dgrad_1x56x56x8_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3184127693 835105643 3337423971 3866137775
|
||||
conv2d dgrad_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2956180805 1092015789 3160693693 1526395881
|
||||
conv2d dgrad_1x56x56x12_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3184127693 1941683430 2236679600 3168985259
|
||||
conv2d dgrad_1x55x55x12_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3184127693 1941683430 3784328837 471971363
|
||||
conv2d wgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 289918791 1266976707 942688231 3457364823
|
||||
conv2d wgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672413387 1027662440 2005082293 2235558527
|
||||
conv2d wgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2754803027 3380032042 1370040310 1348846927
|
||||
conv2d wgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 671982235 1423304149 2107662762 1234913781
|
||||
conv2d wgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 798317794 1709026638 2421185623 3308071321
|
||||
conv2d wgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1721270411 2519327328 2541413264 3185574975
|
||||
conv2d wgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2756413475 2070174510 1364436192 3531942595
|
||||
conv2d wgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2128738105 2056902987 3079166829 2329433528
|
||||
conv2d wgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672413387 3857917762 3227877956 645422556
|
||||
conv2d wgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672413387 3857917762 3817218800 985231315
|
||||
conv2d wgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2754803027 1398036015 3630062764 2492522537
|
||||
conv2d wgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2784049299 643733019 3649549642 2637869234
|
||||
conv2d wgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2756413475 2332160299 302086821 3303132343
|
||||
conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1931093565 2458714707 2919710256 2311575036
|
||||
conv2d wgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2472246681 2260022344 500095455 2760458995
|
||||
conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1530672622 3635363851 2402907878 4131497953
|
||||
conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1500864134 2536338700 2459524764 2504484273
|
||||
conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3344871528 2667385029 2714805835 3487838445
|
||||
conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 966721255 1547169349 3198573835 302049294
|
||||
conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2643693957 2440004820 1576818970 1317923157
|
||||
conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2643693957 2440004820 1576818970 3186679687
|
||||
conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4028893260 4220759192 2236533218 3731336532
|
||||
conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2956871200 1591352238 1756650151 1262787222
|
||||
conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 365467186 892422645 1334708242 1372556938
|
||||
conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_h_h 3347784734 150035460 2897171548 3701081496
|
||||
conv2d wgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 927718585 4106152802 2634710231 744755886
|
||||
conv2d wgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4110991321 3464637181 2709881923 2407415563
|
||||
conv2d wgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 832653836 3723472741 3733128758 3129111191
|
||||
conv2d wgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2075083065 2042513140 253288229 404121198
|
||||
conv2d wgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4005590448 1116254439 525487530 3284739065
|
||||
conv2d wgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 181075276 1743485155 91136873 2508716910
|
||||
conv2d wgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1513864544 386662952 1127709182 4026285141
|
||||
conv2d wgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 856324887 3954249564 2591894666 2655687700
|
||||
conv2d wgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4110991321 1300426008 1263618595 1313664339
|
||||
conv2d wgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4110991321 1300426008 1756414462 2995557277
|
||||
conv2d wgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 832653836 447261065 121940906 1497499264
|
||||
conv2d wgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3484040069 2966693627 1423016429 341928547
|
||||
conv2d wgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1513864544 1759979610 2761559427 68093525
|
||||
conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1906605830 2980501720 1650970502 3258883197
|
||||
conv2d wgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 805717279 3502822733 3985958544 2568949300
|
||||
conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 868180534 3289288595 385631111 328914986
|
||||
conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3437976747 3391080565 1513955316 1521294163
|
||||
conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4195072693 1669352457 2608107448 4284090805
|
||||
conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3457330201 1126870455 106232038 3054809396
|
||||
conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 754609939 1723074453 1186911503 4239438967
|
||||
conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 754609939 1723074453 1186911503 2113601884
|
||||
conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1690216859 2413490039 36034283 1112346965
|
||||
conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3168796339 1601750164 14375779 2894970748
|
||||
conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 261954979 1300976652 4259930640 305685205
|
||||
conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 3747142491 1747587481 4137156526 1174257270
|
||||
conv2d wgrad_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2956180805 1086820986 1644914756 2013471312
|
||||
conv2d wgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2056905385 447674669 724481645 1457430910
|
||||
conv2d wgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4120764770 1227883689 3401425854 3897766524
|
||||
conv2d wgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1972540475 3749787834 3350064812 1136116240
|
||||
conv2d wgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3414629540 820341033 770836461 2451581199
|
||||
conv2d wgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4100326666 2581696511 1088458082 1521190911
|
||||
conv2d wgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3662895757 2885454895 935600441 2615245898
|
||||
conv2d wgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2044596379 3831334389 3506139121 814982501
|
||||
conv2d wgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2154102133 737968461 1291834254 2665225480
|
||||
conv2d wgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4120764770 3573498719 1809195644 1765637461
|
||||
conv2d wgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4120764770 3573498719 3379808294 483095299
|
||||
conv2d wgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1972540475 4194153035 2863868771 1639389008
|
||||
conv2d wgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2624318208 157618421 1779474147 814087242
|
||||
conv2d wgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2044596379 2300180628 423968553 3890279569
|
||||
conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2609259399 1848932917 522753581 1926508271
|
||||
conv2d wgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2948772873 3663040534 4014266327 1288646188
|
||||
conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3271403719 1585195072 1487505772 3253374264
|
||||
conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1419588777 451194147 3578359696 3659768981
|
||||
conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 763924990 2780826684 2883769406 148530958
|
||||
conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2578426561 3849874822 102765469 1305171059
|
||||
conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 740110603 1995451256 2632815435 1516344656
|
||||
conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 740110603 1995451256 2632815435 1586331550
|
||||
conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2462511240 2274021368 1188866747 3178890497
|
||||
conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 752289976 1226457131 4187777346 1400559240
|
||||
conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3723912751 1585959358 3731079159 1498901684
|
||||
conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 fnhwc_fnhwc_fnhwc_f_f 2027599472 2758666204 3287095476 4291916486
|
||||
conv2d wgrad_1x8x8x1_8x8_1x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1767700736 4278264698 2331753571 2554564568
|
||||
conv2d dgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 927718585 3117803557 1370701307 1462167731
|
||||
conv2d dgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4110991321 973422497 1926250028 3440543762
|
||||
conv2d dgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 832653836 2892862516 3649300762 1521470286
|
||||
conv2d dgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2075083065 3181416651 1733426984 872275640
|
||||
conv2d dgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4005590448 1639170045 388151578 4186957447
|
||||
conv2d dgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 181075276 1433744686 860506550 3475157408
|
||||
conv2d dgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1513864544 1747719409 877465841 2345541783
|
||||
conv2d dgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 856324887 2307248012 337386755 3363072703
|
||||
conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1906605830 722034901 2562804622 2508759317
|
||||
conv2d dgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 805717279 2196645331 3235235362 1518334120
|
||||
conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3168796339 72559978 778918419 1260968000
|
||||
conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 261954979 2634885882 451986822 3792829599
|
||||
conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 3747142491 2426759809 2622222681 371723930
|
||||
@ -0,0 +1,187 @@
|
||||
# test/unit/conv/device/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
|
||||
from pycutlass.conv2d_operation import *
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
|
||||
class Conv2dDgradImplicitGemmF16nhwcF16nhwcF16nhwcTensorOpF16SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Dgrad_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Dgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Dgrad_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_align4(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 12),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
def test_SM80_Device_Conv2d_Dgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_align4(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 12),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,162 @@
|
||||
# test/unit/conv/device/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dDgradImplicitGemmF16nhwcF16nhwcF32nhwcTensorOpF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Dgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_unity_stride_stage3(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Dgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_unity_stride_stage4(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32], stages=4,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Dgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_unity_stride_stage3_64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Dgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_unity_stride_stage4_64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=4,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,89 @@
|
||||
# test/unit/conv/device/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass.conv2d_operation import *
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dDgradImplicitGemmF32nhwcF32nhwcF32nhwcSimtF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Fprop_Analytic_ImplicitGemm_f32nhwc_f32nhwc_f32nhwc_simt_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[1, 1, 1],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.Simt,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=1)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 8], stages=4,
|
||||
warp_count=[4, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Dgrad_Optimized_ImplicitGemm_f32nhwc_f32nhwc_f32nhwc_simt_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[1, 1, 1],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.Simt,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=1)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 8], stages=4,
|
||||
warp_count=[2, 4, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,86 @@
|
||||
# test/unit/conv/device/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dDgradImplicitGemmTF32nhwcTF32nhwcTF32nhwcTensorOpF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Dgrad_Analytic_ImplicitGemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 16], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Dgrad_Optimized_ImplicitGemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 16], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Unity,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,154 @@
|
||||
# test/unit/conv/device/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
def conv2d_few_channel_problemsizes(channels):
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 8, 8, channels),
|
||||
cutlass.Tensor4DCoord(16, 3, 3, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(2, 2),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 16, 16, channels),
|
||||
cutlass.Tensor4DCoord(16, 3, 3, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(2, 2),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 16, 16, channels),
|
||||
cutlass.Tensor4DCoord(16, 7, 7, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass.Tensor4DCoord(32, 7, 7, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass.Tensor4DCoord(64, 7, 7, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(2, 2),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass.Tensor4DCoord(64, 5, 5, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass.Tensor4DCoord(64, 5, 5, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(2, 2),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
return problem_sizes
|
||||
|
||||
class Conv2dFpropFewChannelsF16NHWCF16NHWCF16HNWCTensorOpF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Fprop_Few_Channels_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_channels_2(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=2)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=2)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.few_channels,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, conv2d_few_channel_problemsizes(2)))
|
||||
|
||||
def test_SM80_Device_Conv2d_Fprop_Few_Channels_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_channels_1(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=1)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=1)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32], stages=2,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.few_channels,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, conv2d_few_channel_problemsizes(1)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,175 @@
|
||||
# test/unit/conv/device/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
def conv2d_fixed_channel_problemsizes(channels):
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 8, 8, channels),
|
||||
cutlass.Tensor4DCoord(16, 3, 3, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(2, 2),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass.Tensor4DCoord(32, 7, 7, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass.Tensor4DCoord(64, 7, 7, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(2, 2),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass.Tensor4DCoord(64, 5, 5, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass.Tensor4DCoord(64, 5, 5, channels),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(2, 2),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
return problem_sizes
|
||||
|
||||
class Conv2dFpropFixedChannelsF16NHWCF16NHWCF16HNWCTensorOpF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Fprop_Fixed_Channels_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_channels_8(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.fixed_channels,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, conv2d_fixed_channel_problemsizes(8)))
|
||||
|
||||
def test_SM80_Device_Conv2d_Fprop_Fixed_Channels_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_channels_4(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.fixed_channels,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, conv2d_fixed_channel_problemsizes(4)))
|
||||
|
||||
def test_SM80_Device_Conv2d_Fprop_Fixed_Channels_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_channels_2(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=2)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=2)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.fixed_channels,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, conv2d_fixed_channel_problemsizes(2)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,291 @@
|
||||
# test/unit/conv/device/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dFpropImplicitGemmF16nhwcF16nhwcF16nhwcTensorOpF16SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Fprop_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Fprop_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Fprop_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_align2(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=2)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=2)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 12),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 14),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 14),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 23, 56, 98),
|
||||
cutlass.Tensor4DCoord(128, 3, 3, 98),
|
||||
cutlass.Tensor4DCoord(4, 0, 5, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
def test_SM80_Device_Conv2d_Fprop_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_align2(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=2)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=2)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 12),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 14),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 14),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 23, 56, 98),
|
||||
cutlass.Tensor4DCoord(128, 3, 3, 98),
|
||||
cutlass.Tensor4DCoord(4, 0, 5, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
def test_SM80_Device_Conv2d_Fprop_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_align4(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 12),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 28),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 28),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 23, 56, 100),
|
||||
cutlass.Tensor4DCoord(128, 3, 3, 100),
|
||||
cutlass.Tensor4DCoord(4, 0, 5, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,48 @@
|
||||
# test/unit/conv/device/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dFpropImplicitGemmF16nhwcF16nhwcF32nhwcTensorOpF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Fprop_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,87 @@
|
||||
# test/unit/conv/device/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass.conv2d_operation import *
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dFpropImplicitGemmF32nhwcF32nhwcF32nhwcSimtF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Fprop_Analytic_ImplicitGemm_f32nhwc_f32nhwc_f32nhwc_simt_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[1, 1, 1],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.Simt,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=1)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 8], stages=4,
|
||||
warp_count=[4, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle2
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Fprop_Optimized_ImplicitGemm_f32nhwc_f32nhwc_f32nhwc_simt_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[1, 1, 1],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.Simt,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=1)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 8], stages=4,
|
||||
warp_count=[2, 4, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,98 @@
|
||||
# test/unit/conv/device/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dFpropImplicitGemmTF32nhwcTF32nhwcTF32nhwcTensorOpF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Fprop_Analytic_ImplicitGemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 16], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Fprop_Optimized_ImplicitGemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_align2(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=2)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=2)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 16], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.fprop, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 12),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
)
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,235 @@
|
||||
# test/unit/conv/device/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dStridedDgradImplicitGemmF16NHWCF16NHWCF32NHWCTensorOpF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Strided_Dgrad_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_128x128_32x3_64x64x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.StridedDgradIdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Strided_Dgrad_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_128x256_64x3_64x64x64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 256, 64], stages=3,
|
||||
warp_count=[2, 4, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.StridedDgradIdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Strided_Dgrad_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_align4_128x128_32x3_64x64x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.StridedDgradIdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 12),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
def test_SM80_Device_Conv2d_Strided_Dgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_128x128_32x3_64x64x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.StridedDgradIdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Strided_Dgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_128x128_32x3_64x64x32_align4(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.dgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.StridedDgradIdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 56, 56, 12),
|
||||
cutlass.Tensor4DCoord(8, 1, 1, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(2, 2),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 55, 55, 12),
|
||||
cutlass.Tensor4DCoord(8, 1, 1, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(2, 2),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,86 @@
|
||||
# test/unit/conv/device/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dWgradImplicitGemmF16nhwcF16nhwcF16nhwcTensorOpF16SM80(unittest.TestCase):
|
||||
def test_Device_Conv2d_Wgrad_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_Device_Conv2d_Wgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float16, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,224 @@
|
||||
# test/unit/conv/device/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dWgradImplicitGemmF16nhwcF16nhwcF32nhwcTensorOpF32SM80(unittest.TestCase):
|
||||
def test_Device_Conv2d_Wgrad_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 16], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Wgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 16], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Wgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_64x256_32x4_64x64x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[64, 256, 32], stages=3,
|
||||
warp_count=[1, 4, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_Device_Conv2d_Wgrad_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_align4(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 16], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 12),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
def test_Device_Conv2d_Wgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_align4(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 16], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 4, 4, 12),
|
||||
cutlass.Tensor4DCoord(8, 3, 3, 12),
|
||||
cutlass.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass.MatrixCoord(3, 3),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,87 @@
|
||||
# test/unit/conv/device/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass.conv2d_operation import *
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dWgradImplicitGemmF32nhwcF32nhwcF32nhwcSimtF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Wgrad_Analytic_ImplicitGemm_f32nhwc_f32nhwc_f32nhwc_simt_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[1, 1, 1],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.Simt,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=1)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 8], stages=4,
|
||||
warp_count=[2, 4, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.analytic,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Wgrad_Optimized_ImplicitGemm_f32nhwc_f32nhwc_f32nhwc_simt_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[1, 1, 1],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.Simt,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=1)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 8], stages=4,
|
||||
warp_count=[2, 4, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
@ -0,0 +1,98 @@
|
||||
# test/unit/conv/device/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
class Conv2dWgradImplicitGemmTF32nhwcTF32nhwcTF32nhwcTensorOpF32SM80(unittest.TestCase):
|
||||
def test_SM80_Device_Conv2d_Wgrad_Optimized_ImplicitGemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=8)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 16], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation))
|
||||
|
||||
def test_SM80_Device_Conv2d_Wgrad_Optimized_ImplicitGemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_align1(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=math_inst.element_a,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=1)
|
||||
B = TensorDescription(
|
||||
element=math_inst.element_b,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=1)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32,
|
||||
layout=cutlass.TensorNHWC,
|
||||
alignment=4)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32], stages=3,
|
||||
warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst,
|
||||
min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=cutlass.conv.Operator.wgrad, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=80, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=cutlass.float32, stride_support=StrideSupport.Strided,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
problem_sizes = [
|
||||
cutlass.conv.Conv2dProblemSize(
|
||||
cutlass.Tensor4DCoord(1, 8, 8, 1),
|
||||
cutlass.Tensor4DCoord(1, 3, 3, 1),
|
||||
cutlass.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.MatrixCoord(1, 1),
|
||||
cutlass.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
self.assertTrue(test_all_conv2d(operation, problem_sizes))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
10
tools/library/scripts/pycutlass/test/conv/run_all_tests.py
Normal file
10
tools/library/scripts/pycutlass/test/conv/run_all_tests.py
Normal file
@ -0,0 +1,10 @@
|
||||
import pycutlass
|
||||
import unittest
|
||||
from pycutlass.memory_manager import *
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**32, 2**32)
|
||||
loader = unittest.TestLoader()
|
||||
tests = loader.discover('./', 'conv2d_*.py')
|
||||
testRunner = unittest.runner.TextTestRunner()
|
||||
testRunner.run(tests)
|
||||
@ -0,0 +1 @@
|
||||
CUPY_CACHE_DIR=./ python test_frontend.py
|
||||
136
tools/library/scripts/pycutlass/test/frontend/test_frontend.py
Normal file
136
tools/library/scripts/pycutlass/test/frontend/test_frontend.py
Normal file
@ -0,0 +1,136 @@
|
||||
#################################################################################################
|
||||
#
|
||||
# Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#################################################################################################
|
||||
## Test case for Pytorch
|
||||
import pycutlass
|
||||
import unittest
|
||||
from pycutlass import *
|
||||
import torch
|
||||
import cupy as cp
|
||||
|
||||
|
||||
class Test_Frontend(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
#
|
||||
# define the cutlass operator
|
||||
#
|
||||
math_inst = MathInstruction(
|
||||
[1, 1, 1], cutlass.float32, cutlass.float32, cutlass.float32,
|
||||
cutlass.OpClass.Simt, MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
[128, 128, 8], 4, [2, 4, 1],
|
||||
math_inst, 80, 80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
cutlass.float32, cutlass.RowMajor, 1
|
||||
)
|
||||
|
||||
B = TensorDescription(
|
||||
cutlass.float32, cutlass.RowMajor, 1
|
||||
)
|
||||
|
||||
C = TensorDescription(
|
||||
cutlass.float32, cutlass.RowMajor, 1
|
||||
)
|
||||
|
||||
self.operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=cutlass.float32,
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1
|
||||
)
|
||||
|
||||
pycutlass.compiler.add_module([self.operation,])
|
||||
|
||||
|
||||
def test_torch_frontend(self):
|
||||
problem_size = cutlass.gemm.GemmCoord(512, 256, 128)
|
||||
|
||||
tensor_A = torch.ceil(torch.empty(size=(problem_size.m(), problem_size.k()), dtype=torch.float32, device="cuda").uniform_(-8.5, 7.5))
|
||||
tensor_B = torch.ceil(torch.empty(size=(problem_size.k(), problem_size.n()), dtype=torch.float32, device="cuda").uniform_(-8.5, 7.5))
|
||||
tensor_C = torch.ceil(torch.empty(size=(problem_size.m(), problem_size.n()), dtype=torch.float32, device="cuda").uniform_(-8.5, 7.5))
|
||||
tensor_D = torch.empty_like(tensor_C)
|
||||
|
||||
|
||||
alpha = 1.0
|
||||
beta = 0.0
|
||||
|
||||
arguments = GemmArguments(
|
||||
operation=self.operation, problem_size=problem_size,
|
||||
A=tensor_A, B=tensor_B, C=tensor_C, D=tensor_D,
|
||||
output_op=LinearCombinationFunctorArguments(alpha, beta),
|
||||
gemm_mode=cutlass.gemm.Mode.Gemm, split_k_splices=1
|
||||
)
|
||||
|
||||
self.operation.run(arguments)
|
||||
|
||||
arguments.sync()
|
||||
|
||||
tensor_D_ref = alpha * tensor_A @ tensor_B + beta * tensor_C
|
||||
|
||||
self.assertTrue(torch.equal(tensor_D, tensor_D_ref))
|
||||
|
||||
def test_cupy_frontend(self):
|
||||
cp.cuda.set_allocator(rmm.rmm_cupy_allocator)
|
||||
|
||||
problem_size = cutlass.gemm.GemmCoord(512, 256, 128)
|
||||
|
||||
tensor_A = cp.ceil(cp.random.uniform(low=-8.5, high=7.5, size=(problem_size.m(), problem_size.k()), dtype=cp.float32))
|
||||
tensor_B = cp.ceil(cp.random.uniform(low=-8.5, high=7.5, size=(problem_size.k(), problem_size.n()), dtype=cp.float32))
|
||||
tensor_C = cp.ceil(cp.random.uniform(low=-8.5, high=7.5, size=(problem_size.m(), problem_size.n()), dtype=cp.float32))
|
||||
tensor_D = cp.ones_like(tensor_C)
|
||||
|
||||
alpha = 1.0
|
||||
beta = 1.0
|
||||
|
||||
tensor_D_ref = alpha * tensor_A @ tensor_B + beta * tensor_C
|
||||
|
||||
arguments = GemmArguments(
|
||||
operation=self.operation, problem_size=problem_size,
|
||||
A=tensor_A, B=tensor_B, C=tensor_C, D=tensor_D,
|
||||
output_op=LinearCombinationFunctorArguments(alpha, beta),
|
||||
gemm_mode=cutlass.gemm.Mode.Gemm, split_k_splices=1
|
||||
)
|
||||
|
||||
self.operation.run(arguments)
|
||||
|
||||
arguments.sync()
|
||||
|
||||
self.assertTrue(cp.array_equal(tensor_D, tensor_D_ref))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**32, 2**32)
|
||||
unittest.main()
|
||||
93
tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py
Normal file
93
tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py
Normal file
@ -0,0 +1,93 @@
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
from pycutlass.test.gemm_testbed import test_all_gemm
|
||||
|
||||
class GemmBF16TensorOpSm80(unittest.TestCase):
|
||||
def SM80_Device_Gemm_bf16n_bf16n_f32t_tensor_op_f32_64x128x64_32x64x64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.bfloat16, element_b=cutlass.bfloat16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[64, 128, 64],
|
||||
stages=4, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.bfloat16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.bfloat16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.RowMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_Gemm_bf16t_bf16t_bf16t_tensor_op_f32_128x256x64_64x64x64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.bfloat16, element_b=cutlass.bfloat16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[64, 128, 32],
|
||||
stages=6, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.bfloat16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.bfloat16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.bfloat16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "multistage"))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**24, 2**24)
|
||||
unittest.main()
|
||||
425
tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py
Normal file
425
tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py
Normal file
@ -0,0 +1,425 @@
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
from pycutlass.test.gemm_testbed import test_all_gemm
|
||||
|
||||
|
||||
class GemmF16Sm80(unittest.TestCase):
|
||||
def test_SM80_Device_Gemm_f32t_f32n_f32t_tensor_op_bf16_f32_128x128x32_64x64x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.BatchedIdentitySwizzle
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor,
|
||||
direct_store=True
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_Gemm_f16n_f16n_f16t_tensor_op_f32_128x128x64_64x64x64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 64],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_Gemm_f16n_f16n_f32n_tensor_op_f32_128x256x64_64x64x64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 256, 64],
|
||||
stages=3, warp_count=[2, 4, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_Gemm_f16n_f16n_f32t_tensor_op_f32_256x128x64_64x64x64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[256, 128, 64],
|
||||
stages=3, warp_count=[4, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.RowMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_Gemm_f16n_f16t_f16t_tensor_op_f16_sliced_k_128x64x64_64x64x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 64, 64],
|
||||
stages=3, warp_count=[2, 1, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float16
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_GemmUniversal_f16n_f16t_f32t_tensor_op_f32_64x64x32_32x32x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float16, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[64, 64, 32],
|
||||
stages=10, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float16
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_Gemm_f16n_f16t_f32t_tensor_op_f32_256x128x64_64x64x64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[256, 128, 64],
|
||||
stages=3, warp_count=[4, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_test_SM80_Device_Gemm_f16t_f16n_f16t_tensor_op_f16_sliced_k_128x64x64_64x64x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 64, 64],
|
||||
stages=3, warp_count=[2, 1, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_Gemm_f16t_f16t_f32n_tensor_op_f32_128x256x64_64x64x64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 256, 64],
|
||||
stages=3, warp_count=[2, 4, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.RowMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_Gemm_f16t_f16t_f32t_tensor_op_f32_128x256x64_64x64x64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16],
|
||||
element_a=cutlass.float16, element_b=cutlass.float16,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 256, 64],
|
||||
stages=3, warp_count=[2, 4, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**24, 2**24)
|
||||
unittest.main()
|
||||
138
tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py
Normal file
138
tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py
Normal file
@ -0,0 +1,138 @@
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.memory_manager import get_allocated_size
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
from pycutlass.test.gemm_testbed import test_all_gemm
|
||||
|
||||
|
||||
class GemmF32nF32nF32nTensorOpF32Sm80(unittest.TestCase):
|
||||
def test_SM80_Device_Gemm_f32t_f32n_f32t_tensor_op_bf16_f32_128x128x32_64x64x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add_fast_bf16
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.RowMajor,
|
||||
alignment=4
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.RowMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
|
||||
def test_SM80_Device_Gemm_f32n_f32n_f32t_tensor_op_f32_128x128x32_64x64x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.RowMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_Gemm_f32n_f32n_f32t_tensor_op_fast_accurate_f32_64x64x32_32x32x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 8],
|
||||
element_a=cutlass.float32, element_b=cutlass.float32,
|
||||
element_accumulator=cutlass.float32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add_fast_f32
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[64, 64, 32],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.RowMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**24, 2**24)
|
||||
pycutlass.compiler.load_from_cache()
|
||||
unittest.main()
|
||||
95
tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py
Normal file
95
tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py
Normal file
@ -0,0 +1,95 @@
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
from pycutlass.test.gemm_testbed import test_all_gemm
|
||||
|
||||
class GemmF64TensorOpSm80(unittest.TestCase):
|
||||
def test_SM80_Device_Gemm_f64n_f64t_f64t_tensor_op_f64_32x32x16_16x16x16(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[8, 8, 4],
|
||||
element_a=cutlass.float64, element_b=cutlass.float64,
|
||||
element_accumulator=cutlass.float64, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[32, 32, 16],
|
||||
stages=4, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
# alignment 1 restricted for double
|
||||
A = TensorDescription(
|
||||
element=cutlass.float64, layout=cutlass.ColumnMajor,
|
||||
alignment=1
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float64, layout=cutlass.RowMajor,
|
||||
alignment=1
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float64, layout=cutlass.RowMajor,
|
||||
alignment=1
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float64
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
def test_SM80_Device_Gemm_f64t_f64n_f64t_tensor_op_f64_64x64x16_32x32x16(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[8, 8, 4],
|
||||
element_a=cutlass.float64, element_b=cutlass.float64,
|
||||
element_accumulator=cutlass.float64, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[64, 64, 16],
|
||||
stages=4, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
# alignment 1 restricted for double
|
||||
A = TensorDescription(
|
||||
element=cutlass.float64, layout=cutlass.RowMajor,
|
||||
alignment=1
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.float64, layout=cutlass.ColumnMajor,
|
||||
alignment=1
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.float64, layout=cutlass.RowMajor,
|
||||
alignment=1
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float64
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "universal"))
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**24, 2**24)
|
||||
unittest.main()
|
||||
197
tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py
Normal file
197
tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py
Normal file
@ -0,0 +1,197 @@
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
from pycutlass.test.gemm_grouped_testbed import TestbedGrouped
|
||||
|
||||
|
||||
class GemmGroupedSm80(unittest.TestCase):
|
||||
def test_SM80_Device_GemmGrouped_f16n_f16t_f32n_tensor_op_f32_128x128x32_64x64x32(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16], element_a=cutlass.float16,
|
||||
element_b=cutlass.float16, element_accumulator=cutlass.float32,
|
||||
opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
swizzling_functor = cutlass.BatchedIdentitySwizzle
|
||||
|
||||
for precompute_mode in [SchedulerMode.Device, SchedulerMode.Host]:
|
||||
operation = GemmOperationGrouped(
|
||||
tile_description.minimum_compute_capability,
|
||||
tile_description, A, B, C,
|
||||
element_epilogue,
|
||||
epilogue_functor, swizzling_functor,
|
||||
precompute_mode=precompute_mode
|
||||
)
|
||||
|
||||
testbed = TestbedGrouped(operation=operation)
|
||||
|
||||
self.assertTrue(testbed.run(24))
|
||||
|
||||
def test_SM80_Device_GemmGrouped_f64t_f64t_f64n_tensor_op_f64_64x64x16_32x32x16(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[8, 8, 4], element_a=cutlass.float64,
|
||||
element_b=cutlass.float64, element_accumulator=cutlass.float64,
|
||||
opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[64, 64, 16],
|
||||
stages=4, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float64, layout=cutlass.RowMajor,
|
||||
alignment=1
|
||||
)
|
||||
|
||||
B = TensorDescription(
|
||||
element=cutlass.float64, layout=cutlass.RowMajor,
|
||||
alignment=1
|
||||
)
|
||||
|
||||
C = TensorDescription(
|
||||
element=cutlass.float64, layout=cutlass.ColumnMajor,
|
||||
alignment=1
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float64
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
swizzling_functor = cutlass.BatchedIdentitySwizzle
|
||||
|
||||
for precompute_mode in [SchedulerMode.Device, SchedulerMode.Host]:
|
||||
operation = GemmOperationGrouped(
|
||||
tile_description.minimum_compute_capability,
|
||||
tile_description, A, B, C,
|
||||
element_epilogue,
|
||||
epilogue_functor, swizzling_functor,
|
||||
precompute_mode=precompute_mode
|
||||
)
|
||||
|
||||
testbed = TestbedGrouped(operation=operation)
|
||||
|
||||
self.assertTrue(testbed.run(24))
|
||||
|
||||
def test_SM80_Device_GemmGrouped_f32t_f32t_f32t_simt_f32_128x64x8_64x32x1(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[1, 1, 1], element_a=cutlass.float32,
|
||||
element_b=cutlass.float32, element_accumulator=cutlass.float32,
|
||||
opcode_class=cutlass.OpClass.Simt,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 64, 8],
|
||||
stages=4, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.RowMajor,
|
||||
alignment=1
|
||||
)
|
||||
|
||||
B = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.RowMajor,
|
||||
alignment=1
|
||||
)
|
||||
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.RowMajor,
|
||||
alignment=1
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
swizzling_functor = cutlass.BatchedIdentitySwizzle
|
||||
|
||||
for precompute_mode in [SchedulerMode.Device, SchedulerMode.Host]:
|
||||
operation = GemmOperationGrouped(
|
||||
tile_description.minimum_compute_capability,
|
||||
tile_description, A, B, C,
|
||||
element_epilogue,
|
||||
epilogue_functor, swizzling_functor,
|
||||
precompute_mode=precompute_mode
|
||||
)
|
||||
|
||||
testbed = TestbedGrouped(operation=operation)
|
||||
|
||||
self.assertTrue(testbed.run(27))
|
||||
|
||||
def test_SM80_Device_GemmGrouped_f16n_f16t_f32n_tensor_op_f32_128x128x32_64x64x32_cache(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 16], element_a=cutlass.float16,
|
||||
element_b=cutlass.float16, element_accumulator=cutlass.float32,
|
||||
opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 32],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
|
||||
B = TensorDescription(
|
||||
element=cutlass.float16, layout=cutlass.ColumnMajor,
|
||||
alignment=8
|
||||
)
|
||||
|
||||
C = TensorDescription(
|
||||
element=cutlass.float32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination
|
||||
swizzling_functor = cutlass.BatchedIdentitySwizzle
|
||||
|
||||
for precompute_mode in [SchedulerMode.Device, SchedulerMode.Host]:
|
||||
operation = GemmOperationGrouped(
|
||||
tile_description.minimum_compute_capability,
|
||||
tile_description, A, B, C,
|
||||
element_epilogue,
|
||||
epilogue_functor, swizzling_functor,
|
||||
precompute_mode=precompute_mode
|
||||
)
|
||||
|
||||
testbed = TestbedGrouped(operation=operation)
|
||||
|
||||
self.assertTrue(testbed.run(5))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
unittest.main()
|
||||
219
tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py
Normal file
219
tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py
Normal file
@ -0,0 +1,219 @@
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
from pycutlass.test.gemm_testbed import test_all_gemm
|
||||
|
||||
class GemmS8TensorOpF32Sm80(unittest.TestCase):
|
||||
def test_SM80_Device_Gemm_s8t_s8n_s8t_tensor_op_s32_64x64x64_32x32x64(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 32],
|
||||
element_a=cutlass.int8, element_b=cutlass.int8,
|
||||
element_accumulator=cutlass.int32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add_saturate
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[64, 64, 64],
|
||||
stages=6, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.ColumnMajorInterleaved32,
|
||||
alignment=16
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.RowMajorInterleaved32,
|
||||
alignment=16
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.ColumnMajorInterleaved32,
|
||||
alignment=8
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.FastLinearCombinationClamp
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "interleaved"))
|
||||
|
||||
def test_SM80_Device_Gemm_s8t_s8n_s8t_tensor_op_s32_256x128x128_64x64x128(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 32],
|
||||
element_a=cutlass.int8, element_b=cutlass.int8,
|
||||
element_accumulator=cutlass.int32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 128],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.RowMajor,
|
||||
alignment=16
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.ColumnMajor,
|
||||
alignment=16
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.RowMajor,
|
||||
alignment=16
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.FastLinearCombinationClamp
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "multistage"))
|
||||
|
||||
def test_SM80_Device_Gemm_s8t_s8n_s8n_tensor_op_s32_128x128x128_64x64x128(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 32],
|
||||
element_a=cutlass.int8, element_b=cutlass.int8,
|
||||
element_accumulator=cutlass.int32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 128],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.RowMajor,
|
||||
alignment=16
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.ColumnMajor,
|
||||
alignment=16
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.ColumnMajor,
|
||||
alignment=16
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.float32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.FastLinearCombinationClamp
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "multistage"))
|
||||
|
||||
def test_SM80_Device_Gemm_s8t_s8n_s32n_tensor_op_s32_128x128x128_64x64x128(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 32],
|
||||
element_a=cutlass.int8, element_b=cutlass.int8,
|
||||
element_accumulator=cutlass.int32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 128],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.RowMajor,
|
||||
alignment=16
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.ColumnMajor,
|
||||
alignment=16
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.int32, layout=cutlass.ColumnMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.int32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombinationClamp
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "multistage"))
|
||||
|
||||
def test_SM80_Device_Gemm_s8t_s8n_s32t_tensor_op_s32_128x128x128_64x64x128(self):
|
||||
math_inst = MathInstruction(
|
||||
instruction_shape=[16, 8, 32],
|
||||
element_a=cutlass.int8, element_b=cutlass.int8,
|
||||
element_accumulator=cutlass.int32, opcode_class=cutlass.OpClass.TensorOp,
|
||||
math_operation=MathOperation.multiply_add
|
||||
)
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=[128, 128, 128],
|
||||
stages=3, warp_count=[2, 2, 1],
|
||||
math_instruction=math_inst, min_compute=80, max_compute=80
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.RowMajor,
|
||||
alignment=16
|
||||
)
|
||||
B = TensorDescription(
|
||||
element=cutlass.int8, layout=cutlass.ColumnMajor,
|
||||
alignment=16
|
||||
)
|
||||
C = TensorDescription(
|
||||
element=cutlass.int32, layout=cutlass.RowMajor,
|
||||
alignment=4
|
||||
)
|
||||
|
||||
element_epilogue = cutlass.int32
|
||||
|
||||
epilogue_functor = EpilogueFunctor.LinearCombinationClamp
|
||||
|
||||
swizzling_functor = cutlass.IdentitySwizzle1
|
||||
|
||||
operation = GemmOperationUniversal(
|
||||
arch=80, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
self.assertTrue(test_all_gemm(operation, "multistage"))
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**24, 2**24)
|
||||
unittest.main()
|
||||
@ -0,0 +1,9 @@
|
||||
import pycutlass
|
||||
import unittest
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**26, 2**26)
|
||||
loader = unittest.TestLoader()
|
||||
tests = loader.discover('./', 'gemm_*.py')
|
||||
testRunner = unittest.runner.TextTestRunner()
|
||||
testRunner.run(tests)
|
||||
@ -0,0 +1,350 @@
|
||||
conv2d fprop_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1767700736 2104699940 3506659864 557648934
|
||||
conv2d fprop_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1539314507 3971227455 1976927351 1642148785
|
||||
conv2d fprop_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 276489656 653235219 3147305346 880610205
|
||||
conv2d fprop_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 272457724 2178229139 2786201726 4170295839
|
||||
conv2d fprop_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 242235041 2149454506 784935854 682531065
|
||||
conv2d fprop_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 215903418 3478189705 1667216236 1437761176
|
||||
conv2d fprop_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 215903418 379326961 1780379994 3740415776
|
||||
conv2d fprop_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 215903418 924848818 3533854396 2683779476
|
||||
conv2d fprop_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2870331951 359232443 2147867990 1653277018
|
||||
conv2d fprop_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2870331951 3784314846 2644315999 4224154526
|
||||
conv2d fprop_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3787448414 3562991793 535073859 2563373454
|
||||
conv2d fprop_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 426169840 2464808416 864648234 461884698
|
||||
conv2d fprop_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2564934525 3910792915 3577331017 827498183
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 28479234 867695528 1947311971 83328334
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4192922822 4244595864 2296602326 2349214706
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 274678245 3464152269 1682550229 3446204619
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3993280136 828543035 1319748516 956044554
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 832003025 3799813757 4030292245 457791957
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1444316594 4129865888 93616503 412257611
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2931873718 1841508064 1497852219 36703874
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2931873718 1841508064 1497852219 1842147148
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1612565294 109894479 1782187316 3370789453
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 841569299 1010785577 1158956167 3261208135
|
||||
conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1893352157 48149942 3544807462 446577726
|
||||
conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 fnhwc_fnhwc_fnhwc_f_f 3585320147 2150950452 1625817025 3964129474
|
||||
conv2d dgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4120764770 1227883689 3016005301 4142905842
|
||||
conv2d dgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4120764770 3337296764 4183699161 3654176452
|
||||
conv2d dgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1972540475 3852963969 864006170 920352568
|
||||
conv2d dgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2624318208 2750240096 2120184232 2600672872
|
||||
conv2d dgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2044596379 3224082300 2084034673 3588056946
|
||||
conv2d dgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3271403719 3033073939 304048758 1882633089
|
||||
conv2d dgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1419588777 610026473 447427404 2639856195
|
||||
conv2d dgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 763924990 2818680871 58428273 3332443900
|
||||
conv2d dgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2578426561 1891702153 103393067 2558647731
|
||||
conv2d dgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 740110603 162127134 3567670201 3173514764
|
||||
conv2d dgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 740110603 162127134 3567670201 363897018
|
||||
conv2d dgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2462511240 1350938697 1696306119 1005311005
|
||||
conv2d dgrad_1x56x56x8_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3884703009 3552725366 1975514757 1210310496
|
||||
conv2d wgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2056905385 447674669 724481645 1457430910
|
||||
conv2d wgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4120764770 1227883689 3401425854 3897766524
|
||||
conv2d wgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1972540475 3749787834 3350064812 1136116240
|
||||
conv2d wgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3414629540 820341033 770836461 2451581199
|
||||
conv2d wgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4100326666 2581696511 1088458082 1521190911
|
||||
conv2d wgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3662895757 2885454895 935600441 2615245898
|
||||
conv2d wgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2044596379 3831334389 3506139121 814982501
|
||||
conv2d wgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2154102133 737968461 1291834254 2665225480
|
||||
conv2d wgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4120764770 3573498719 1809195644 1765637461
|
||||
conv2d wgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4120764770 3573498719 3379808294 483095299
|
||||
conv2d wgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1972540475 4194153035 2863868771 1639389008
|
||||
conv2d wgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2624318208 157618421 1779474147 814087242
|
||||
conv2d wgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2044596379 2300180628 423968553 3890279569
|
||||
conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2609259399 1848932917 522753581 1926508271
|
||||
conv2d wgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2948772873 3663040534 4014266327 1288646188
|
||||
conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3271403719 1585195072 1487505772 3253374264
|
||||
conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1419588777 451194147 3578359696 3659768981
|
||||
conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 763924990 2780826684 2883769406 148530958
|
||||
conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2578426561 3849874822 102765469 1305171059
|
||||
conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 740110603 1995451256 2632815435 1516344656
|
||||
conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 740110603 1995451256 2632815435 1586331550
|
||||
conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2462511240 2274021368 1188866747 3178890497
|
||||
conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 752289976 1226457131 4187777346 1400559240
|
||||
conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3723912751 1585959358 3731079159 1498901684
|
||||
conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 fnhwc_fnhwc_fnhwc_f_f 2027599472 2758666204 3287095476 4291916486
|
||||
conv2d fprop_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 3393706648 3519979618 1149261202 799742106
|
||||
conv2d fprop_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 3409586999 409840186 1724648597 2642018980
|
||||
conv2d fprop_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1815685330 1398622058 2431638856 1016967269
|
||||
conv2d fprop_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2555706782 3271563943 1020153035 299097281
|
||||
conv2d fprop_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 4173830187 736684125 472021975 2064613035
|
||||
conv2d fprop_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 3010335403 2751224679 2250540122 3725638844
|
||||
conv2d fprop_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 3010335403 1583610315 3287895411 2394340435
|
||||
conv2d fprop_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 3010335403 2356047354 7055632 915702611
|
||||
conv2d fprop_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2748205217 2539405983 1217377670 2011175578
|
||||
conv2d fprop_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2748205217 2114448427 249997769 2711364520
|
||||
conv2d fprop_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1528321643 1532777511 3597171412 296622236
|
||||
conv2d fprop_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1326617037 3415095747 847196866 1481554158
|
||||
conv2d fprop_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1122706355 2841974626 2791878604 632900093
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1728385278 2462678309 3066040807 1334515660
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2175275779 1117731224 857614711 2096711962
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 4140401170 3710340185 1683575469 317397427
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 3552249008 2918315307 2290683130 536859016
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2869959072 2516947012 3328285094 2393284712
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1349264322 1823945068 400087667 2893025864
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 3321662203 426084311 4233055093 4078572279
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 3321662203 426084311 4233055093 3044377475
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 803041205 2521863610 3206942690 127091020
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 4083508736 37801570 240515127 2234797539
|
||||
conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2207374588 535059558 2268619394 1489214085
|
||||
conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 dnhwc_dnhwc_dnhwc_d_d 3614026280 1721563676 2979825951 1104908081
|
||||
conv2d dgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 810746104 2226238626 2053372396 2462697514
|
||||
conv2d dgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 810746104 235646718 1374133172 3696289981
|
||||
conv2d dgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2536722089 184705847 3148323124 84213385
|
||||
conv2d dgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2264868815 1724845245 3498302256 4094034457
|
||||
conv2d dgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1621735632 233390337 1801952602 3532884734
|
||||
conv2d dgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 3048346885 2306163504 642074123 4083120683
|
||||
conv2d dgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2798030672 683783039 3025345160 1890891136
|
||||
conv2d dgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1731071506 1844675436 2292509333 4006304179
|
||||
conv2d dgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 132147677 604503886 143348844 3037223953
|
||||
conv2d dgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1269799445 1678940393 3405733837 1820114523
|
||||
conv2d dgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1269799445 1678940393 3405733837 467254076
|
||||
conv2d dgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1794301352 2320042028 2134048179 508141072
|
||||
conv2d dgrad_1x56x56x8_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 561590023 3382154048 4154621995 517057927
|
||||
conv2d wgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 593915463 2360210889 2685491481 2265099675
|
||||
conv2d wgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 810746104 2226238626 1155815529 558646991
|
||||
conv2d wgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2536722089 1876429398 4216128545 1754596046
|
||||
conv2d wgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 348523586 2609019785 3938405680 2601133907
|
||||
conv2d wgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1984146316 1475870285 1157657800 1143965395
|
||||
conv2d wgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2971058593 1478256319 503014742 3930504182
|
||||
conv2d wgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1621735632 1214508920 1537003531 3830217225
|
||||
conv2d wgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2031518387 2695641559 933408074 4026827730
|
||||
conv2d wgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 810746104 517276344 1158854831 3123629043
|
||||
conv2d wgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 810746104 517276344 1448394173 1864626308
|
||||
conv2d wgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2536722089 711164468 2465036841 2993377049
|
||||
conv2d wgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2264868815 3003481795 333430991 3094857755
|
||||
conv2d wgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1621735632 1126010692 3313703859 637497110
|
||||
conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1130094757 2605103293 2477101661 1276123281
|
||||
conv2d wgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 4286533436 1302900889 2613245986 2523724148
|
||||
conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 3048346885 923365529 1681226722 417509256
|
||||
conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2798030672 3441819646 1293178065 188472807
|
||||
conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1731071506 1117530547 2706270359 502156742
|
||||
conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 132147677 2029225588 3851064913 3164530726
|
||||
conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1269799445 2337137106 3312954197 2466682688
|
||||
conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1269799445 2337137106 3312954197 2684544683
|
||||
conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 1794301352 72938921 2354994612 1463501392
|
||||
conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 252570564 2903451081 3619280116 1448586411
|
||||
conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 dnhwc_dnhwc_dnhwc_d_d 2037991187 1665743881 241585763 103256264
|
||||
conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 dnhwc_dnhwc_dnhwc_d_d 2653975581 3337638999 1440125233 2448165745
|
||||
conv2d fprop_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 991402150 1393431534 1148212814 1350914659
|
||||
conv2d fprop_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4208297221 4283492776 419570292 1210341563
|
||||
conv2d fprop_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4178596783 3828059710 2735749436 2671012171
|
||||
conv2d fprop_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 924522595 563724475 3750778972 4152580670
|
||||
conv2d fprop_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1021044158 1686067905 3765040166 4102272733
|
||||
conv2d fprop_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3335547 2674994719 635224486 2759329777
|
||||
conv2d fprop_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3335547 4201252830 2920298728 304256151
|
||||
conv2d fprop_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3335547 70289262 646435722 4137562540
|
||||
conv2d fprop_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1317457392 1288095320 2132879813 656196754
|
||||
conv2d fprop_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1317457392 2202157489 2326567490 2475188414
|
||||
conv2d fprop_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2476454437 1857118302 4164386062 239840568
|
||||
conv2d fprop_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2767650699 3514840131 590439733 3879821123
|
||||
conv2d fprop_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3896287283 3112762669 2515107934 2106635937
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1903067870 1021832870 3003938078 2751931686
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3489785028 2466126497 1374078692 2737628040
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2051350923 263676708 3639860119 1370886256
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 719099834 1474713672 204857540 2768940347
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3441724486 3162593831 421721594 3097845598
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2034354027 1249407570 2567025479 1441082595
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 941893937 3608468045 635631428 2369653089
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 941893937 3608468045 635631428 1218705038
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 172579142 319546523 718795680 1453661415
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2823351660 1326352711 1110204809 1155441703
|
||||
conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3238446487 2572503545 686287700 1559476701
|
||||
conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_f_f 2149247508 1775375365 3317647029 2497607448
|
||||
conv2d dgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4110991321 3464637181 1623218578 436154205
|
||||
conv2d dgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4110991321 1479940693 3253144559 3883419107
|
||||
conv2d dgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 832653836 1871463331 2425320272 74566211
|
||||
conv2d dgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3484040069 664160900 3610888033 22347127
|
||||
conv2d dgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1513864544 1924855848 1382111427 2541177413
|
||||
conv2d dgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 868180534 1764715518 3070473696 2392864704
|
||||
conv2d dgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3437976747 666906244 3401957738 2050602745
|
||||
conv2d dgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4195072693 1575210381 781892324 2848949054
|
||||
conv2d dgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3457330201 2316839359 1539389419 4293781748
|
||||
conv2d dgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 754609939 2469024119 2885305868 2693098375
|
||||
conv2d dgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 754609939 2469024119 2885305868 1969608051
|
||||
conv2d dgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1690216859 554790212 2885143346 780489333
|
||||
conv2d dgrad_1x56x56x8_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3184127693 835105643 3337423971 3866137775
|
||||
conv2d wgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 927718585 4106152802 720400339 3989318043
|
||||
conv2d wgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4110991321 3464637181 4051957661 126285749
|
||||
conv2d wgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 832653836 3723472741 2044236350 2463899842
|
||||
conv2d wgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2075083065 2042513140 3691286135 322550345
|
||||
conv2d wgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4005590448 1116254439 2328237343 1918824440
|
||||
conv2d wgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 181075276 1743485155 3526891198 1979405632
|
||||
conv2d wgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1513864544 386662952 4057300775 1456746562
|
||||
conv2d wgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 856324887 3954249564 2340393915 4127188930
|
||||
conv2d wgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4110991321 1300426008 2921497047 4145791960
|
||||
conv2d wgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4110991321 1300426008 4080981223 3076991942
|
||||
conv2d wgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 832653836 447261065 3823545045 392205236
|
||||
conv2d wgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3484040069 2966693627 3900095420 919511892
|
||||
conv2d wgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1513864544 1759979610 4272621682 1029257940
|
||||
conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1906605830 2980501720 978889789 3136018973
|
||||
conv2d wgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 805717279 3502822733 1810065278 1387739380
|
||||
conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 868180534 3289288595 209477462 4142168174
|
||||
conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3437976747 3391080565 97275649 4063718293
|
||||
conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4195072693 1669352457 2182133559 2494741804
|
||||
conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3457330201 1126870455 319272291 3811977088
|
||||
conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 754609939 1723074453 1660326213 3902884425
|
||||
conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 754609939 1723074453 1660326213 423159249
|
||||
conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1690216859 2413490039 223529410 3303697952
|
||||
conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3168796339 1601750164 1428743330 403295189
|
||||
conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 261954979 1300976652 2749562370 3058142403
|
||||
conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_f_f 3747142491 1747587481 3143977827 835130482
|
||||
conv2d fprop_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1736512560 49406874 846358010 3314905564
|
||||
conv2d fprop_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1848484956 1432417472 1903569827 3750799351
|
||||
conv2d fprop_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4236427320 3696009469 69852620 201921851
|
||||
conv2d fprop_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 109006944 450017448 1793784844 903209915
|
||||
conv2d fprop_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 813367872 2397796503 1928191746 3210229460
|
||||
conv2d fprop_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1348284291 1307184141 46021356 1674017987
|
||||
conv2d fprop_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1348284291 1212511562 3331767121 2446286369
|
||||
conv2d fprop_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1348284291 2013675943 1681111033 1469213228
|
||||
conv2d fprop_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1703349794 500298386 3218034344 4159283207
|
||||
conv2d fprop_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1703349794 1123534155 145385311 4273847179
|
||||
conv2d fprop_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3862659311 349459322 1503631520 1404971956
|
||||
conv2d fprop_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1623686755 961217371 552550209 3980749384
|
||||
conv2d fprop_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3554927580 1131648083 4149599295 3119557776
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1767639287 3350675774 128324027 1059816532
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3986143536 17411088 40173029 1694092310
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1157793540 3513299281 48848814 1435528367
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 988962069 4292634763 388976034 2674929544
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4202383208 3529769234 1046186503 3368902675
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 856448884 3057259762 2063087558 1995545427
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2281940872 144496548 2455451862 400986166
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2281940872 144496548 2455451862 1082696406
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2702905851 1992889713 731289041 608504198
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2742293143 4197915274 606840 3671124731
|
||||
conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 149434841 2288560511 2994968424 2881838300
|
||||
conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_h_h 2226824643 327135318 3718671210 2121176659
|
||||
conv2d dgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672413387 1027662440 4172720592 446082987
|
||||
conv2d dgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672413387 1101653138 3727072529 875733988
|
||||
conv2d dgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2754803027 3906526127 655926291 939844058
|
||||
conv2d dgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2784049299 2031878085 1709408312 1277173429
|
||||
conv2d dgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2756413475 22652410 1700696921 2175632852
|
||||
conv2d dgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1530672622 436588210 470857851 284463232
|
||||
conv2d dgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1500864134 59350507 969037229 1510558485
|
||||
conv2d dgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3344871528 856797938 2030818524 4231831552
|
||||
conv2d dgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 966721255 2885833872 2829967135 3441569557
|
||||
conv2d dgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2643693957 4148824382 2827420298 378131261
|
||||
conv2d dgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2643693957 4148824382 2827420298 2955292920
|
||||
conv2d dgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4028893260 1474248671 1302526250 4182204885
|
||||
conv2d dgrad_1x56x56x8_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1569788048 162506176 819639712 763595635
|
||||
conv2d wgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 289918791 1266976707 942688231 3457364823
|
||||
conv2d wgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672413387 1027662440 2005082293 2235558527
|
||||
conv2d wgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2754803027 3380032042 1370040310 1348846927
|
||||
conv2d wgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 671982235 1423304149 2107662762 1234913781
|
||||
conv2d wgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 798317794 1709026638 2421185623 3308071321
|
||||
conv2d wgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1721270411 2519327328 2541413264 3185574975
|
||||
conv2d wgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2756413475 2070174510 1364436192 3531942595
|
||||
conv2d wgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2128738105 2056902987 3079166829 2329433528
|
||||
conv2d wgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672413387 3857917762 3227877956 645422556
|
||||
conv2d wgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672413387 3857917762 3817218800 985231315
|
||||
conv2d wgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2754803027 1398036015 3630062764 2492522537
|
||||
conv2d wgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2784049299 643733019 3649549642 2637869234
|
||||
conv2d wgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2756413475 2332160299 302086821 3303132343
|
||||
conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1931093565 2458714707 2919710256 2311575036
|
||||
conv2d wgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2472246681 2260022344 500095455 2760458995
|
||||
conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1530672622 3635363851 2402907878 4131497953
|
||||
conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1500864134 2536338700 2459524764 2504484273
|
||||
conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3344871528 2667385029 2714805835 3487838445
|
||||
conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 966721255 1547169349 3198573835 302049294
|
||||
conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2643693957 2440004820 1576818970 1317923157
|
||||
conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2643693957 2440004820 1576818970 3186679687
|
||||
conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4028893260 4220759192 2236533218 3731336532
|
||||
conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2956871200 1591352238 1756650151 1262787222
|
||||
conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 365467186 892422645 1334708242 1372556938
|
||||
conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_h_h 3347784734 150035460 2897171548 3701081496
|
||||
conv2d fprop_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 945660191 3750377696 2496492611 3515056508
|
||||
conv2d fprop_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2806300501 2591577756 3148637036 3845512743
|
||||
conv2d fprop_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2322444122 3525997046 281106520 3456307300
|
||||
conv2d fprop_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 327345109 1137297282 1938163814 2551101563
|
||||
conv2d fprop_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 797067973 481331945 350851834 2477733239
|
||||
conv2d fprop_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1316460560 2044204046 1034822169 3340281844
|
||||
conv2d fprop_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1316460560 4174274001 1597212204 1881272946
|
||||
conv2d fprop_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1316460560 1535088984 3001492060 2308505016
|
||||
conv2d fprop_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 3190527989 3733991924 4211138051 3710311115
|
||||
conv2d fprop_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 3190527989 3430768821 1043108884 4185640072
|
||||
conv2d fprop_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 943531303 1948306075 3877008798 2803592376
|
||||
conv2d fprop_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 3262141476 4125717435 2946529611 2221512094
|
||||
conv2d fprop_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1599291337 3982786366 1581171257 1188352423
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2237070215 3046262465 1926804094 1435916873
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 721666814 2012769306 1712378956 1388990183
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1596349869 3775131163 355203300 1126174452
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1380587417 1208642645 2886387159 3113955983
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1332573203 1417735573 1422796372 3309229181
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2714027800 2106992819 1196036582 2095126659
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1105097447 1992731268 2198911423 3378137735
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1105097447 1992731268 2198911423 3868431311
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2552471160 2218470296 2332616929 923645661
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2231354584 4035702005 3839068434 8981294
|
||||
conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 4019719318 3985307916 3604065639 277096636
|
||||
conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 bf16nhwc_bf16nhwc_fnhwc_f_f 258381429 3482776077 2663631601 593179089
|
||||
conv2d dgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1613915345 188810648 1623218578 2585892217
|
||||
conv2d dgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1613915345 691990354 3253144559 2988350639
|
||||
conv2d dgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2788041828 1670375523 2425320272 2553108650
|
||||
conv2d dgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1049321188 1865889553 3610888033 1459693945
|
||||
conv2d dgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 3820648800 3236781482 1382111427 1986396315
|
||||
conv2d dgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 463742721 2524037630 3070473696 210045128
|
||||
conv2d dgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 738614177 4071452982 3401957738 2920893800
|
||||
conv2d dgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2479111539 2662555669 781892324 2338234282
|
||||
conv2d dgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2089076160 260434096 1539389419 1219120658
|
||||
conv2d dgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 14838294 3344412669 2885305868 1926445693
|
||||
conv2d dgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 14838294 3344412669 2885305868 1478058549
|
||||
conv2d dgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 3945616248 4118489020 2885143346 1545684873
|
||||
conv2d dgrad_1x56x56x8_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 295760528 1685244361 3337423971 772814550
|
||||
conv2d wgrad_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 623727338 942771643 2634710231 3063349371
|
||||
conv2d wgrad_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1613915345 188810648 2709881923 3532383400
|
||||
conv2d wgrad_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2788041828 3762161398 3733128758 3693097785
|
||||
conv2d wgrad_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 139944998 3812563855 253288229 1359907535
|
||||
conv2d wgrad_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 492562992 3677108443 525487530 445191233
|
||||
conv2d wgrad_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 594197095 3773864559 91136873 4170763393
|
||||
conv2d wgrad_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 3820648800 1025574686 1127709182 677727764
|
||||
conv2d wgrad_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1901075489 3296829308 2591894666 2932517926
|
||||
conv2d wgrad_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1613915345 4223561525 1263618595 50680160
|
||||
conv2d wgrad_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1613915345 4223561525 1756414462 3209752057
|
||||
conv2d wgrad_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2788041828 1023542180 121940906 624551470
|
||||
conv2d wgrad_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1049321188 296097075 1423016429 1058165639
|
||||
conv2d wgrad_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 3820648800 4160685370 2761559427 1788182893
|
||||
conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1859384988 222880684 1650970502 1632078530
|
||||
conv2d wgrad_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 1704522433 2403392926 3985958544 1432584676
|
||||
conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 463742721 3455033786 385631111 1683348880
|
||||
conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 738614177 3199562330 1513955316 2131256035
|
||||
conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2479111539 2702777753 2608107448 4014212857
|
||||
conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2089076160 4042009058 106232038 1140762595
|
||||
conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 14838294 2260768172 1186911503 3194129408
|
||||
conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 14838294 2260768172 1186911503 1312312812
|
||||
conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 3945616248 2287161276 36034283 4262860382
|
||||
conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 2906914535 476297538 14375779 1340176713
|
||||
conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 bf16nhwc_bf16nhwc_fnhwc_f_f 4292101959 3378414564 4259930640 1392755176
|
||||
conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 bf16nhwc_bf16nhwc_fnhwc_f_f 3529371817 368260304 4137156526 122558013
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2948718568 2631391783 3260825675 4278587299
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 1635109696 2835574424 4179385325 2803281440
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3344954627 1649157278 2032056735 1176638626
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 61750237 3452849177 1697665310 3475459781
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 1394759191 1571308277 898534533 4125341936
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3402206912 2433594404 1575577431 4106154211
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 98638790 2735493952 346473870 1911666301
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 98638790 2735493952 346473870 2124440208
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2934485636 3286257323 541566528 1113783492
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 164942943 4259285988 1250700182 508419908
|
||||
conv2d fprop_1x1x1x64_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 3805460372 2607401558 3465030781 210641751
|
||||
conv2d fprop_1x1x8x64_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 4200926784 1001915027 387475271 3360115596
|
||||
conv2d fprop_1x7x8x64_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 331078659 469730619 2547196469 1620698703
|
||||
conv2d fprop_1x7x9x64_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 431968022 1614654085 903827412 1349891842
|
||||
conv2d fprop_2x7x9x64_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 3674369485 1055554271 3217013807 1356703347
|
||||
conv2d fprop_3x7x9x64_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 2609462247 3227824772 365527403 2720889763
|
||||
conv2d fprop_3x7x9x64_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 2609462247 2150996976 2899308770 2371758816
|
||||
conv2d fprop_3x7x9x64_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 2609462247 2124373651 2711906981 3194739760
|
||||
conv2d fprop_1x11x7x64_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 1070162100 2750964634 3090791018 3481982191
|
||||
conv2d fprop_1x11x7x64_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 1070162100 1563941622 767747438 3163252390
|
||||
conv2d fprop_1x13x11x64_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 884815233 3576251756 3216742798 3534462723
|
||||
conv2d fprop_1x17x19x64_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 3230717758 3192193994 1161445944 371179683
|
||||
conv2d fprop_1x23x5x64_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 2450454245 2905280248 910194866 839083662
|
||||
conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 2948718568 2631391783 638794727 4292051282
|
||||
conv2d fprop_1x16x24x128_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 1635109696 2835574424 1855687620 130932480
|
||||
conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 3344954627 1649157278 4191418350 958044197
|
||||
conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 61750237 3452849177 3260472389 771128506
|
||||
conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 1394759191 1571308277 4279538191 956191103
|
||||
conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 3402206912 2433594404 2021112123 2983097553
|
||||
conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 98638790 2735493952 3178839372 568554158
|
||||
conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 98638790 2735493952 3178839372 18194802
|
||||
conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 2934485636 3286257323 2559221535 2310182528
|
||||
conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 164942943 4259285988 984016853 888753301
|
||||
conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_i 2823094147 1681845497 4242738907 3244428635
|
||||
conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 s8nhwc_s8nhwc_inhwc_i_i 4060010502 2881035321 3927119619 3311661122
|
||||
440
tools/library/scripts/pycutlass/test/unit/test_sm80.py
Normal file
440
tools/library/scripts/pycutlass/test/unit/test_sm80.py
Normal file
@ -0,0 +1,440 @@
|
||||
#################################################################################################
|
||||
#
|
||||
# Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#################################################################################################
|
||||
|
||||
## Test case generator for SM80
|
||||
|
||||
import pycutlass
|
||||
from pycutlass import *
|
||||
from pycutlass.test import *
|
||||
import unittest
|
||||
|
||||
#
|
||||
# Create GEMM operation
|
||||
#
|
||||
|
||||
def TestGemmOperator(gemm_kind, math_inst, layout, alignment, tiling, arch, mixed=False,
|
||||
epilogue_functor = EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1, **kwargs):
|
||||
"""
|
||||
Test GEMM Operation based on configuration
|
||||
"""
|
||||
|
||||
if "data_type" in kwargs.keys():
|
||||
data_type = kwargs["data_type"]
|
||||
else:
|
||||
if mixed or math_inst.element_a == cutlass.bfloat16:
|
||||
data_type = [
|
||||
math_inst.element_a,
|
||||
math_inst.element_b,
|
||||
math_inst.element_accumulator,
|
||||
math_inst.element_accumulator
|
||||
]
|
||||
else:
|
||||
data_type = [
|
||||
math_inst.element_a,
|
||||
math_inst.element_b,
|
||||
math_inst.element_a,
|
||||
math_inst.element_accumulator
|
||||
]
|
||||
|
||||
tile_description = TileDescription(
|
||||
tiling[0], tiling[1], tiling[2],
|
||||
math_inst, arch, arch
|
||||
)
|
||||
|
||||
A = TensorDescription(
|
||||
data_type[0], layout[0], alignment[0]
|
||||
)
|
||||
|
||||
B = TensorDescription(
|
||||
data_type[1], layout[1], alignment[1]
|
||||
)
|
||||
|
||||
C = TensorDescription(
|
||||
data_type[2], layout[2], alignment[2]
|
||||
)
|
||||
|
||||
element_epilogue = data_type[3]
|
||||
|
||||
if gemm_kind == GemmKind.Universal:
|
||||
operation = GemmOperationUniversal(
|
||||
arch=arch, tile_description=tile_description,
|
||||
A=A, B=B, C=C, element_epilogue=element_epilogue,
|
||||
epilogue_functor=epilogue_functor, swizzling_functor=swizzling_functor
|
||||
)
|
||||
if A.layout in [cutlass.ColumnMajorInterleaved32, cutlass.RowMajorInterleaved32]:
|
||||
return test_all_gemm(operation, "interleaved")
|
||||
else:
|
||||
return test_all_gemm(operation, "universal")
|
||||
|
||||
elif gemm_kind == GemmKind.Grouped:
|
||||
operation = GemmOperationGrouped(
|
||||
arch, tile_description, A, B, C,
|
||||
element_epilogue, epilogue_functor, swizzling_functor,
|
||||
precompute_mode=kwargs["precompute_mode"]
|
||||
)
|
||||
testbed = TestbedGrouped(operation=operation)
|
||||
return testbed.run(24)
|
||||
else:
|
||||
raise NotImplementedError("the gemm kind is not implemented")
|
||||
|
||||
|
||||
def TestConv2dOperator(math_inst, alignment, tiling, arch,
|
||||
stride_supports=[StrideSupport.Strided, StrideSupport.Strided, StrideSupport.Strided],
|
||||
epilogue_functor=EpilogueFunctor.LinearCombination,
|
||||
swizzling_functor=cutlass.IdentitySwizzle1, interleaved=False, **kwargs):
|
||||
"""
|
||||
Test Conv2d Operation based on configurations
|
||||
"""
|
||||
|
||||
mixeds = [False, True, False]
|
||||
conv_kinds = [cutlass.conv.Operator.fprop, cutlass.conv.Operator.dgrad, cutlass.conv.Operator.wgrad]
|
||||
|
||||
results = []
|
||||
|
||||
default_swizzling_functor = swizzling_functor
|
||||
|
||||
if "layout" in kwargs.keys():
|
||||
layout = kwargs["layout"]
|
||||
else:
|
||||
layout = (cutlass.TensorNHWC, cutlass.TensorNHWC, cutlass.TensorNHWC)
|
||||
|
||||
for mixed, conv_kind, stride_support in zip(mixeds, conv_kinds, stride_supports):
|
||||
|
||||
if "data_type" in kwargs.keys():
|
||||
data_type = kwargs["data_type"]
|
||||
else:
|
||||
if mixed or math_inst.element_a == cutlass.bfloat16:
|
||||
data_type = [
|
||||
math_inst.element_a,
|
||||
math_inst.element_b,
|
||||
math_inst.element_accumulator,
|
||||
math_inst.element_accumulator
|
||||
]
|
||||
else:
|
||||
data_type = [
|
||||
math_inst.element_a,
|
||||
math_inst.element_b,
|
||||
math_inst.element_a,
|
||||
math_inst.element_accumulator
|
||||
]
|
||||
# skip Int8 Conv Backward
|
||||
if data_type[0] == cutlass.int8 and conv_kind in [cutlass.conv.Operator.dgrad, cutlass.conv.Operator.wgrad]:
|
||||
continue
|
||||
|
||||
A = TensorDescription(
|
||||
element=data_type[0],
|
||||
layout=layout[0],
|
||||
alignment=alignment[0])
|
||||
B = TensorDescription(
|
||||
element=data_type[1],
|
||||
layout=layout[1],
|
||||
alignment=alignment[1])
|
||||
C = TensorDescription(
|
||||
element=data_type[2],
|
||||
layout=layout[2],
|
||||
alignment=alignment[2])
|
||||
|
||||
tile_description = TileDescription(
|
||||
threadblock_shape=tiling[0], stages=tiling[1],
|
||||
warp_count=tiling[2],
|
||||
math_instruction=math_inst,
|
||||
min_compute=arch, max_compute=arch
|
||||
)
|
||||
|
||||
if conv_kind == cutlass.conv.Operator.dgrad and stride_support == StrideSupport.Strided:
|
||||
swizzling_functor = cutlass.StridedDgradIdentitySwizzle1
|
||||
else:
|
||||
swizzling_functor = default_swizzling_functor
|
||||
|
||||
operation = Conv2dOperation(
|
||||
conv_kind=conv_kind, iterator_algorithm=cutlass.conv.IteratorAlgorithm.optimized,
|
||||
arch=arch, tile_description=tile_description, A=A, B=B, C=C,
|
||||
element_epilogue=data_type[3], stride_support=stride_support,
|
||||
epilogue_functor=epilogue_functor,
|
||||
swizzling_functor=swizzling_functor
|
||||
)
|
||||
|
||||
results.append(test_all_conv2d(operation, interleaved=interleaved))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
||||
class Test_SM80(unittest.TestCase):
|
||||
def test_SM80_TensorOp_16816(self):
|
||||
math_instructions = [
|
||||
MathInstruction(
|
||||
[16, 8, 16], cutlass.float16, cutlass.float16, cutlass.float32,
|
||||
cutlass.OpClass.TensorOp, MathOperation.multiply_add
|
||||
),
|
||||
MathInstruction(
|
||||
[16, 8, 16], cutlass.float16, cutlass.float16, cutlass.float16,
|
||||
cutlass.OpClass.TensorOp, MathOperation.multiply_add
|
||||
),
|
||||
MathInstruction(
|
||||
[16, 8, 16], cutlass.bfloat16, cutlass.bfloat16, cutlass.float32,
|
||||
cutlass.OpClass.TensorOp, MathOperation.multiply_add
|
||||
)
|
||||
]
|
||||
|
||||
layouts = [
|
||||
(cutlass.RowMajor, cutlass.RowMajor, cutlass.RowMajor),
|
||||
(cutlass.ColumnMajor, cutlass.RowMajor, cutlass.RowMajor),
|
||||
(cutlass.RowMajor, cutlass.ColumnMajor, cutlass.RowMajor)
|
||||
]
|
||||
|
||||
alignments = [
|
||||
(8, 8, 8), (4, 8, 8), (8, 4, 8)
|
||||
]
|
||||
|
||||
tilings = [
|
||||
([256, 128, 32], 3, [4, 2, 1]),
|
||||
([64, 256, 32], 4, [1, 4, 1]),
|
||||
([128, 64, 64], 3, [2, 2, 1])
|
||||
]
|
||||
|
||||
for math_inst, layout, alignment, tiling in zip(math_instructions, layouts, alignments, tilings):
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Universal, math_inst, layout, alignment, tiling, 80, False))
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Grouped, math_inst, layout, alignment, tiling, 80, True, precompute_mode=SchedulerMode.Host))
|
||||
stride_supports = [StrideSupport.Strided, StrideSupport.Strided, StrideSupport.Strided]
|
||||
results = TestConv2dOperator(math_inst, alignment, tiling, 80, stride_supports=stride_supports)
|
||||
for res in results:
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_SM80_TensorOp_1688(self):
|
||||
# tf32 is not supported by most of python environment. Skip the test
|
||||
self.assertTrue(True)
|
||||
|
||||
def test_SM80_TensorOp_1688_fast_math(self):
|
||||
math_instructions = [
|
||||
MathInstruction(
|
||||
[16, 8, 8], cutlass.tfloat32, cutlass.tfloat32, cutlass.float32,
|
||||
cutlass.OpClass.TensorOp, MathOperation.multiply_add
|
||||
),
|
||||
MathInstruction(
|
||||
[16, 8, 8], cutlass.float16, cutlass.float16, cutlass.float32,
|
||||
cutlass.OpClass.TensorOp, MathOperation.multiply_add_fast_f16
|
||||
),
|
||||
MathInstruction(
|
||||
[16, 8, 8], cutlass.bfloat16, cutlass.bfloat16, cutlass.float32,
|
||||
cutlass.OpClass.TensorOp, MathOperation.multiply_add_fast_bf16
|
||||
),
|
||||
MathInstruction(
|
||||
[16, 8, 8], cutlass.float32, cutlass.float32, cutlass.float32,
|
||||
cutlass.OpClass.TensorOp, MathOperation.multiply_add_fast_f32
|
||||
)
|
||||
]
|
||||
|
||||
layouts = [
|
||||
(cutlass.RowMajor, cutlass.RowMajor, cutlass.ColumnMajor),
|
||||
(cutlass.RowMajor, cutlass.ColumnMajor, cutlass.ColumnMajor),
|
||||
(cutlass.ColumnMajor, cutlass.RowMajor, cutlass.ColumnMajor),
|
||||
(cutlass.ColumnMajor, cutlass.ColumnMajor, cutlass.RowMajor)
|
||||
]
|
||||
alignments = [
|
||||
(4, 4, 4), (4, 2, 4), (2, 4, 4), (2, 2, 4)
|
||||
]
|
||||
tilings = [
|
||||
([128, 256, 16], 3, [4, 2, 1]),
|
||||
([64, 256, 16], 4, [1, 4, 1]),
|
||||
([128, 64, 32], 3, [2, 2, 1]),
|
||||
([256, 64, 32], 3, [4, 2, 1])
|
||||
]
|
||||
data_type = [
|
||||
cutlass.float32, cutlass.float32, cutlass.float32, cutlass.float32
|
||||
]
|
||||
for math_inst, layout, alignment, tiling in zip(math_instructions, layouts, alignments, tilings):
|
||||
self.assertTrue(
|
||||
TestGemmOperator(
|
||||
GemmKind.Universal, math_inst, layout,
|
||||
alignment, tiling, 80, False, data_type=data_type))
|
||||
self.assertTrue(
|
||||
TestGemmOperator(
|
||||
GemmKind.Grouped, math_inst, layout, alignment, tiling, 80,
|
||||
True, precompute_mode=SchedulerMode.Device, data_type=data_type))
|
||||
stride_supports = [StrideSupport.Unity, StrideSupport.Strided, StrideSupport.Unity]
|
||||
results = TestConv2dOperator(math_inst, alignment, tiling, 80, stride_supports=stride_supports, data_type=data_type)
|
||||
for res in results:
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_SM80_TensorOp_884(self):
|
||||
math_inst = MathInstruction(
|
||||
[8, 8, 4], cutlass.float64, cutlass.float64, cutlass.float64,
|
||||
cutlass.OpClass.TensorOp, MathOperation.multiply_add
|
||||
)
|
||||
layout = (cutlass.ColumnMajor, cutlass.ColumnMajor, cutlass.ColumnMajor)
|
||||
alignment = (1, 1, 1)
|
||||
|
||||
tiling = ([64, 256, 16], 3, [2, 4, 1])
|
||||
data_type = [cutlass.float64, cutlass.float64, cutlass.float64, cutlass.float64]
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Universal, math_inst, layout, alignment, tiling, 80, False, data_type=data_type))
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Grouped, math_inst, layout, alignment, tiling, 80, True, precompute_mode=SchedulerMode.Device, data_type=data_type))
|
||||
stride_supports = [StrideSupport.Unity, StrideSupport.Strided, StrideSupport.Unity]
|
||||
results = TestConv2dOperator(math_inst, alignment, tiling, 80, stride_supports=stride_supports, data_type=data_type)
|
||||
for res in results:
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_SM80_TensorOp_16832_TN(self):
|
||||
math_inst = MathInstruction(
|
||||
[16, 8, 32], cutlass.int8, cutlass.int8, cutlass.int32,
|
||||
cutlass.OpClass.TensorOp, MathOperation.multiply_add_saturate
|
||||
)
|
||||
layout = (cutlass.RowMajor, cutlass.ColumnMajor, cutlass.ColumnMajor)
|
||||
alignment = (16, 16, 4)
|
||||
alignment_mixed = (16, 16, 16)
|
||||
tiling = ([128, 256, 64], 3, [2, 4, 1])
|
||||
|
||||
data_type = [cutlass.int8, cutlass.int8, cutlass.int32, cutlass.int32]
|
||||
data_type_mixed = [cutlass.int8, cutlass.int8, cutlass.int8, cutlass.float32]
|
||||
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Universal, math_inst, layout, alignment, tiling, 80, False, data_type=data_type))
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Grouped, math_inst, layout, alignment_mixed, tiling, 80, True, precompute_mode=SchedulerMode.Device, data_type=data_type_mixed))
|
||||
stride_supports = [StrideSupport.Strided, StrideSupport.Strided, StrideSupport.Strided]
|
||||
results = TestConv2dOperator(math_inst, alignment, tiling, 80, stride_supports=stride_supports, data_type=data_type)
|
||||
for res in results:
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_SM80_Simt_f32(self):
|
||||
math_inst = MathInstruction(
|
||||
[1, 1, 1], cutlass.float32, cutlass.float32, cutlass.float32,
|
||||
cutlass.OpClass.Simt, MathOperation.multiply_add
|
||||
)
|
||||
layout = (cutlass.RowMajor, cutlass.RowMajor, cutlass.RowMajor)
|
||||
alignment = (1, 1, 1)
|
||||
|
||||
tiling = ([128, 256, 8], 4, [2, 4, 1])
|
||||
data_type = [cutlass.float32, cutlass.float32, cutlass.float32, cutlass.float32]
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Universal, math_inst, layout, alignment, tiling, 80, False, data_type=data_type))
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Grouped, math_inst, layout, alignment, tiling, 80, True, precompute_mode=SchedulerMode.Host, data_type=data_type))
|
||||
stride_supports = [StrideSupport.Strided, StrideSupport.Strided, StrideSupport.Strided]
|
||||
results = TestConv2dOperator(math_inst, alignment, tiling, 80, stride_supports=stride_supports, data_type=data_type)
|
||||
for res in results:
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_SM80_Simt_f64(self):
|
||||
math_inst = MathInstruction(
|
||||
[1, 1, 1], cutlass.float64, cutlass.float64, cutlass.float64,
|
||||
cutlass.OpClass.Simt, MathOperation.multiply_add
|
||||
)
|
||||
layout = (cutlass.RowMajor, cutlass.RowMajor, cutlass.ColumnMajor)
|
||||
alignment = (1, 1, 1)
|
||||
|
||||
tiling = ([64, 128, 8], 5, [2, 2, 1])
|
||||
data_type = [cutlass.float64, cutlass.float64, cutlass.float64, cutlass.float64]
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Universal, math_inst, layout, alignment, tiling, 80, False, data_type=data_type))
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Grouped, math_inst, layout, alignment, tiling, 80, True, precompute_mode=SchedulerMode.Device, data_type=data_type))
|
||||
stride_supports = [StrideSupport.Strided, StrideSupport.Strided, StrideSupport.Strided]
|
||||
results = TestConv2dOperator(math_inst, alignment, tiling, 80, stride_supports=stride_supports, data_type=data_type)
|
||||
for res in results:
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_SM80_TensorOp_16832_Interleaved(self):
|
||||
math_inst = MathInstruction(
|
||||
[16, 8, 32], cutlass.int8, cutlass.int8, cutlass.int32,
|
||||
cutlass.OpClass.TensorOp, MathOperation.multiply_add_saturate
|
||||
)
|
||||
|
||||
layout = (cutlass.ColumnMajorInterleaved32, cutlass.RowMajorInterleaved32, cutlass.ColumnMajorInterleaved32)
|
||||
alignment_mixed = (16, 16, 8)
|
||||
tiling = ([256, 64, 64], 4, [4, 1, 1])
|
||||
data_type_mixed = [cutlass.int8, cutlass.int8, cutlass.int8, cutlass.float32]
|
||||
|
||||
self.assertTrue(TestGemmOperator(GemmKind.Universal, math_inst, layout, alignment_mixed, tiling, 80, False, data_type=data_type_mixed, epilogue_functor=EpilogueFunctor.FastLinearCombinationClamp))
|
||||
stride_supports = [StrideSupport.Strided, StrideSupport.Strided, StrideSupport.Strided]
|
||||
layout = [cutlass.TensorNC32HW32, cutlass.TensorC32RSK32, cutlass.TensorNC32HW32]
|
||||
results = TestConv2dOperator(math_inst, alignment_mixed, tiling, 80, stride_supports=stride_supports, data_type=data_type_mixed, layout=layout, interleaved=True)
|
||||
for res in results:
|
||||
self.assertTrue(res)
|
||||
|
||||
def SM80_SparseTensorOp_16832(self):
|
||||
pass
|
||||
def test_SM80_PlanarComplexTensorOp_16816(self):
|
||||
pass
|
||||
def test_SM80_SparseTensorOp_16816_fast_math(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_1688_complex(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_1688_fast_fp32_math_complex(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_1688_rank_k(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_1688_rank_k_complex(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_1688_trmm(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_1688_trmm_complex(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_1688_symm(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_1688_symm_complex(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_complex(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_complex_gaussian(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_rank_k(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_rank_k_complex(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_rank_k_complex_gaussian(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_trmm(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_trmm_complex(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_trmm_complex_gaussian(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_symm(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_symm_complex(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_884_symm_complex_gaussian(self):
|
||||
pass
|
||||
def test_SM80_SparseTensorOp_16864_TN(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_16864_TN(self):
|
||||
pass
|
||||
def test_SM80_SparseTensorOp_168128_TN(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_16864_Interleaved(self):
|
||||
pass
|
||||
def test_SM80_TensorOp_168256(self):
|
||||
pass
|
||||
def test_SM80_Simt_complex(self):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pycutlass.get_memory_pool(2**20, 2**34)
|
||||
pycutlass.compiler.nvcc()
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user