我在边界寻址模式下使用CUDA纹理( cudaAddressModeBorder )。 我正在使用tex2D<float>()读取纹理坐标。 当纹理坐标落在纹理之外时, tex2D<float>()返回0 。
我怎样才能将这个返回的边界值从0改为别的? 我可以手动检查纹理坐标并自己设置边界值。 我想知道是否有CUDA API可以设置这样的边界值。
I am using a CUDA texture in border addressing mode (cudaAddressModeBorder). I am reading texture coordinates using tex2D<float>(). When the texture coordinates fall outside the texture, tex2D<float>() returns 0.
How can I change this returned border value from 0 to something else? I could check the texture coordinate manually and set the border value myself. I was wondering if there was CUDA API where I can set such a border value.
最满意答案
正如sgarizvi所提到的,CUDA仅支持四种非可定制的地址模式,即钳位 , 边界 , 换行和镜像 ,这些在3.2.11.1节中有描述。 的CUDA编程指南。
前两者在非标准化和标准化坐标中工作,而后两者仅在标准化坐标中工作。
为了描述前两个,为了简单起见,让我们考虑未标准化的坐标情况并考虑1D信号。 在这种情况下,输入序列是c[k] ,其中k=0,...,M-1 。
cudaAddressModeClamp
信号c[k]在k=0,...,M-1之外延续k=0,...,M-1使得对于k < 0 , c[k] = c[0]对于k < 0 , c[k] = c[M-1] k >= M
cudaAddressModeBorder
信号c[k]在k=0,...,M-1之外延续k=0,...,M-1使得对于k < 0且对于k >= M , c[k] = 0 。
现在,为了描述最后两种地址模式,我们不得不考虑归一化的坐标,因此假定一维输入信号采样为c[k / M] ,其中k=0,...,M-1 。
cudaAddressModeWrap
信号c[k / M]在k=0,...,M-1之外延续k=0,...,M-1使得周期等于M 换句话说,任何(正,负或消失)整数p c[(k + p * M) / M] = c[k / M] 。
cudaAddressModeMirror
信号c[k / M]在k=0,...,M-1之外继续k=0,...,M-1因此它周期等于2 * M - 2 。 换句话说,对于任何l和k使得(l + k)mod(2 * M - 2) = 0 c[l / M] = c[k / M] 。
以下代码说明了所有四种可用的地址模式
#include <stdio.h> texture<float, 1, cudaReadModeElementType> texture_clamp; texture<float, 1, cudaReadModeElementType> texture_border; texture<float, 1, cudaReadModeElementType> texture_wrap; texture<float, 1, cudaReadModeElementType> texture_mirror; /********************/ /* CUDA ERROR CHECK */ /********************/ #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) { if (code != cudaSuccess) { fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); if (abort) exit(code); } } /******************************/ /* CUDA ADDRESS MODE CLAMPING */ /******************************/ __global__ void Test_texture_clamping(const int M) { printf("Texture clamping - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_clamp, -(float)threadIdx.x)); printf("Texture clamping - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_clamp, (float)(M + threadIdx.x))); } /****************************/ /* CUDA ADDRESS MODE BORDER */ /****************************/ __global__ void Test_texture_border(const int M) { printf("Texture border - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_border, -(float)threadIdx.x)); printf("Texture border - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_border, (float)(M + threadIdx.x))); } /**************************/ /* CUDA ADDRESS MODE WRAP */ /**************************/ __global__ void Test_texture_wrap(const int M) { printf("Texture wrap - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_wrap, -(float)threadIdx.x/(float)M)); printf("Texture wrap - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_wrap, (float)(M + threadIdx.x)/(float)M)); } /****************************/ /* CUDA ADDRESS MODE MIRROR */ /****************************/ __global__ void Test_texture_mirror(const int M) { printf("Texture mirror - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_mirror, -(float)threadIdx.x/(float)M)); printf("Texture mirror - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_mirror, (float)(M + threadIdx.x)/(float)M)); } /********/ /* MAIN */ /********/ void main(){ const int M = 4; // --- Host side memory allocation and initialization float *h_data = (float*)malloc(M * sizeof(float)); for (int i=0; i<M; i++) h_data[i] = (float)i; // --- Texture clamping cudaArray* d_data_clamping = NULL; gpuErrchk(cudaMallocArray(&d_data_clamping, &texture_clamp.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_clamping, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_clamp, d_data_clamping); texture_clamp.normalized = false; texture_clamp.addressMode[0] = cudaAddressModeClamp; dim3 dimBlock(2 * M, 1); dim3 dimGrid(1, 1); Test_texture_clamping<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture border cudaArray* d_data_border = NULL; gpuErrchk(cudaMallocArray(&d_data_border, &texture_border.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_border, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_border, d_data_border); texture_border.normalized = false; texture_border.addressMode[0] = cudaAddressModeBorder; Test_texture_border<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture wrap cudaArray* d_data_wrap = NULL; gpuErrchk(cudaMallocArray(&d_data_wrap, &texture_wrap.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_wrap, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_wrap, d_data_wrap); texture_wrap.normalized = true; texture_wrap.addressMode[0] = cudaAddressModeWrap; Test_texture_wrap<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture mirror cudaArray* d_data_mirror = NULL; gpuErrchk(cudaMallocArray(&d_data_mirror, &texture_mirror.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_mirror, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_mirror, d_data_mirror); texture_mirror.normalized = true ; texture_mirror.addressMode[0] = cudaAddressModeMirror; Test_texture_mirror<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); }这些是输出
index -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 clamp 0 0 0 0 0 0 0 0 1 2 3 3 3 3 3 3 3 3 3 border 0 0 0 0 0 0 0 0 1 2 3 0 0 0 0 0 0 0 0 wrap 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 mirror 1 2 3 3 2 1 0 0 1 2 3 3 2 1 0 0 1 2 3As mentioned by sgarizvi, CUDA supports only four, non-customizable address modes, namely, clamp, border, wrap and mirror, which are described in Section 3.2.11.1. of the CUDA programming guide.
The former two work in both unnormalized and normalized coordinates, while the latter two in normalized coordinates only.
To describe the first two, let us consider the unnormalized coordinates case and consider 1D signals, for the sake of simplicity. In this case, the input sequence is c[k], with k=0,...,M-1.
cudaAddressModeClamp
The signal c[k] is continued outside k=0,...,M-1 so that c[k] = c[0] for k < 0, and c[k] = c[M-1] for k >= M.
cudaAddressModeBorder
The signal c[k] is continued outside k=0,...,M-1 so that c[k] = 0 for k < 0and for k >= M.
Now, to describe the last two address modes, we are forced to consider normalized coordinates, so that the 1D input signal samples are assumed to be c[k / M], with k=0,...,M-1.
cudaAddressModeWrap
The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to M. In other words, c[(k + p * M) / M] = c[k / M] for any (positive, negative or vanishing) integer p.
cudaAddressModeMirror
The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to 2 * M - 2. In other words, c[l / M] = c[k / M] for any l and k such that (l + k)mod(2 * M - 2) = 0.
The following code illustrates all the four available address modes
#include <stdio.h> texture<float, 1, cudaReadModeElementType> texture_clamp; texture<float, 1, cudaReadModeElementType> texture_border; texture<float, 1, cudaReadModeElementType> texture_wrap; texture<float, 1, cudaReadModeElementType> texture_mirror; /********************/ /* CUDA ERROR CHECK */ /********************/ #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) { if (code != cudaSuccess) { fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); if (abort) exit(code); } } /******************************/ /* CUDA ADDRESS MODE CLAMPING */ /******************************/ __global__ void Test_texture_clamping(const int M) { printf("Texture clamping - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_clamp, -(float)threadIdx.x)); printf("Texture clamping - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_clamp, (float)(M + threadIdx.x))); } /****************************/ /* CUDA ADDRESS MODE BORDER */ /****************************/ __global__ void Test_texture_border(const int M) { printf("Texture border - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_border, -(float)threadIdx.x)); printf("Texture border - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_border, (float)(M + threadIdx.x))); } /**************************/ /* CUDA ADDRESS MODE WRAP */ /**************************/ __global__ void Test_texture_wrap(const int M) { printf("Texture wrap - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_wrap, -(float)threadIdx.x/(float)M)); printf("Texture wrap - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_wrap, (float)(M + threadIdx.x)/(float)M)); } /****************************/ /* CUDA ADDRESS MODE MIRROR */ /****************************/ __global__ void Test_texture_mirror(const int M) { printf("Texture mirror - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_mirror, -(float)threadIdx.x/(float)M)); printf("Texture mirror - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_mirror, (float)(M + threadIdx.x)/(float)M)); } /********/ /* MAIN */ /********/ void main(){ const int M = 4; // --- Host side memory allocation and initialization float *h_data = (float*)malloc(M * sizeof(float)); for (int i=0; i<M; i++) h_data[i] = (float)i; // --- Texture clamping cudaArray* d_data_clamping = NULL; gpuErrchk(cudaMallocArray(&d_data_clamping, &texture_clamp.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_clamping, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_clamp, d_data_clamping); texture_clamp.normalized = false; texture_clamp.addressMode[0] = cudaAddressModeClamp; dim3 dimBlock(2 * M, 1); dim3 dimGrid(1, 1); Test_texture_clamping<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture border cudaArray* d_data_border = NULL; gpuErrchk(cudaMallocArray(&d_data_border, &texture_border.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_border, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_border, d_data_border); texture_border.normalized = false; texture_border.addressMode[0] = cudaAddressModeBorder; Test_texture_border<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture wrap cudaArray* d_data_wrap = NULL; gpuErrchk(cudaMallocArray(&d_data_wrap, &texture_wrap.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_wrap, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_wrap, d_data_wrap); texture_wrap.normalized = true; texture_wrap.addressMode[0] = cudaAddressModeWrap; Test_texture_wrap<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture mirror cudaArray* d_data_mirror = NULL; gpuErrchk(cudaMallocArray(&d_data_mirror, &texture_mirror.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_mirror, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_mirror, d_data_mirror); texture_mirror.normalized = true ; texture_mirror.addressMode[0] = cudaAddressModeMirror; Test_texture_mirror<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); }Those are the outputs
index -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 clamp 0 0 0 0 0 0 0 0 1 2 3 3 3 3 3 3 3 3 3 border 0 0 0 0 0 0 0 0 1 2 3 0 0 0 0 0 0 0 0 wrap 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 mirror 1 2 3 3 2 1 0 0 1 2 3 3 2 1 0 0 1 2 3更多推荐
发布评论