> 文章列表 > cuda ptx 汇编语言示例:读寄存器

cuda ptx 汇编语言示例:读寄存器

cuda ptx 汇编语言示例:读寄存器

编译 ,  Ampere 显卡,rtx 3060   3070...

nvcc -arch=sm_86 -o hello hello_ptx.cu

或写成Makefile:

hello: hello_sm_id.cunvcc -arch=sm_86 -o $@ $^
#nvcc -arch=sm_86 -o hello hello_sm_id.cu

$@  是指目标

$^  是指第一个依赖  ^^

hello_ptx.cu

#include <stdio.h>
#include <stdint.h>static __device__ __inline__ uint32_t __mysmid(){uint32_t ssmid;asm volatile("mov.u32 %0, %%smid;" : "=r"(ssmid));return ssmid;}static __device__ __inline__ uint32_t __mywarpid(){uint32_t warpid;asm volatile("mov.u32 %0, %%warpid;" : "=r"(warpid));return warpid;}static __device__ __inline__ uint32_t __mylaneid(){uint32_t laneid;asm volatile("mov.u32 %0, %%laneid;" : "=r"(laneid));return laneid;}__global__ void mykernel(){int idx = threadIdx.x+blockDim.x*blockIdx.x;unsigned thx = threadIdx.x;
//      if(threadIdx.x==1023)// && blockIdx.x<3)
//      if(threadIdx.x==1)
//      if((thx==0 || thx==32 || thx==64 || thx==96 || thx==128 || thx==160 || thx==192 || thx==224 || thx==256) && blockIdx.x==0)if(blockIdx.x<=33 && thx==0)printf("I am thread %d, my SM ID is %d, my warp ID is %d, and my warp lane is %d\\n", idx, __mysmid(), __mywarpid(), __mylaneid());
}int main(){dim3 grid_;dim3 block_;grid_.x=34;block_.x=1024;mykernel<<<grid_,block_>>>();cudaDeviceSynchronize();return 0;
}
//$ nvcc -arch=sm_20 -o hello hello_ptx.cu

运行:

结果分析:

rtx3060中,

当 block 的个数从一个增加到两个,3个,...

smid的值为 0,  2,  4, ...直到偶数最大  max_even(smid), 然后是  1,3,5,... 直到奇数最大 max_odd(smid).

rtx 3060  2 SM/TPC

奇偶的变化,跟这个里的2是否有关系呢?以及有什么关系呢?