正在加载图片...
A Work-Inefficient Scan Kernel global void work_inefficient scan kernel(float *X,float *Y,int InputSize) shared float XY[SECTION SIZE]; int i blockldx.x blockDim.x threadldx.x; if (i InputSize){XY[threadldx.x]=X[i];} /the code below performs iterative scan on XY for(unsigned int stride 1;stride <threadldx.x;stride *=2){ syncthreads(); float in1 XY[threadldx.x-stride]; syncthreads(); XY[threadldx.x]+in1; syncthreads(); If (i InputSize){Y[i]XY[threadldx.x];} 电子科妓女学 017 A Work-Inefficient Scan Kernel __global__ void work_inefficient_scan_kernel(float *X, float *Y, int InputSize) { __shared__ float XY[SECTION_SIZE]; int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < InputSize) {XY[threadIdx.x] = X[i];} // the code below performs iterative scan on XY for (unsigned int stride = 1; stride <= threadIdx.x; stride *= 2) { __syncthreads(); float in1 = XY[threadIdx.x - stride]; __syncthreads(); XY[threadIdx.x] += in1; } __ syncthreads(); If (i < InputSize) {Y[i] = XY[threadIdx.x];} }
<<向上翻页向下翻页>>
©2008-现在 cucdc.com 高等教育资讯网 版权所有