CUDA纹理的不同寻址模式(The different addressing modes of CUDA textures)

编程入门 行业动态 更新时间:2024-10-21 12:00:46
CUDA纹理的不同寻址模式(The different addressing modes of CUDA textures)

我在边界寻址模式下使用CUDA纹理( cudaAddressModeBorder )。 我正在使用tex2D<float>()读取纹理坐标。 当纹理坐标落在纹理之外时, tex2D<float>()返回0 。

我怎样才能将这个返回的边界值从0改为别的? 我可以手动检查纹理坐标并自己设置边界值。 我想知道是否有CUDA API可以设置这样的边界值。

I am using a CUDA texture in border addressing mode (cudaAddressModeBorder). I am reading texture coordinates using tex2D<float>(). When the texture coordinates fall outside the texture, tex2D<float>() returns 0.

How can I change this returned border value from 0 to something else? I could check the texture coordinate manually and set the border value myself. I was wondering if there was CUDA API where I can set such a border value.

最满意答案

正如sgarizvi所提到的,CUDA仅支持四种非可定制的地址模式,即钳位边界换行镜像 ,这些在3.2.11.1节中有描述。 的CUDA编程指南。

前两者在非标准化和标准化坐标中工作,而后两者仅在标准化坐标中工作。

为了描述前两个,为了简单起见,让我们考虑未标准化的坐标情况并考虑1D信号。 在这种情况下,输入序列是c[k] ,其中k=0,...,M-1 。

cudaAddressModeClamp

信号c[k]在k=0,...,M-1之外延续k=0,...,M-1使得对于k < 0 , c[k] = c[0]对于k < 0 , c[k] = c[M-1] k >= M

cudaAddressModeBorder

信号c[k]在k=0,...,M-1之外延续k=0,...,M-1使得对于k < 0且对于k >= M , c[k] = 0 。

现在,为了描述最后两种地址模式,我们不得不考虑归一化的坐标,因此假定一维输入信号采样为c[k / M] ,其中k=0,...,M-1 。

cudaAddressModeWrap

信号c[k / M]在k=0,...,M-1之外延续k=0,...,M-1使得周期等于M 换句话说,任何(正,负或消失)整数p c[(k + p * M) / M] = c[k / M] 。

cudaAddressModeMirror

信号c[k / M]在k=0,...,M-1之外继续k=0,...,M-1因此它周期等于2 * M - 2 。 换句话说,对于任何l和k使得(l + k)mod(2 * M - 2) = 0 c[l / M] = c[k / M] 。

以下代码说明了所有四种可用的地址模式

#include <stdio.h> texture<float, 1, cudaReadModeElementType> texture_clamp; texture<float, 1, cudaReadModeElementType> texture_border; texture<float, 1, cudaReadModeElementType> texture_wrap; texture<float, 1, cudaReadModeElementType> texture_mirror; /********************/ /* CUDA ERROR CHECK */ /********************/ #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) { if (code != cudaSuccess) { fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); if (abort) exit(code); } } /******************************/ /* CUDA ADDRESS MODE CLAMPING */ /******************************/ __global__ void Test_texture_clamping(const int M) { printf("Texture clamping - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_clamp, -(float)threadIdx.x)); printf("Texture clamping - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_clamp, (float)(M + threadIdx.x))); } /****************************/ /* CUDA ADDRESS MODE BORDER */ /****************************/ __global__ void Test_texture_border(const int M) { printf("Texture border - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_border, -(float)threadIdx.x)); printf("Texture border - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_border, (float)(M + threadIdx.x))); } /**************************/ /* CUDA ADDRESS MODE WRAP */ /**************************/ __global__ void Test_texture_wrap(const int M) { printf("Texture wrap - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_wrap, -(float)threadIdx.x/(float)M)); printf("Texture wrap - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_wrap, (float)(M + threadIdx.x)/(float)M)); } /****************************/ /* CUDA ADDRESS MODE MIRROR */ /****************************/ __global__ void Test_texture_mirror(const int M) { printf("Texture mirror - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_mirror, -(float)threadIdx.x/(float)M)); printf("Texture mirror - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_mirror, (float)(M + threadIdx.x)/(float)M)); } /********/ /* MAIN */ /********/ void main(){ const int M = 4; // --- Host side memory allocation and initialization float *h_data = (float*)malloc(M * sizeof(float)); for (int i=0; i<M; i++) h_data[i] = (float)i; // --- Texture clamping cudaArray* d_data_clamping = NULL; gpuErrchk(cudaMallocArray(&d_data_clamping, &texture_clamp.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_clamping, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_clamp, d_data_clamping); texture_clamp.normalized = false; texture_clamp.addressMode[0] = cudaAddressModeClamp; dim3 dimBlock(2 * M, 1); dim3 dimGrid(1, 1); Test_texture_clamping<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture border cudaArray* d_data_border = NULL; gpuErrchk(cudaMallocArray(&d_data_border, &texture_border.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_border, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_border, d_data_border); texture_border.normalized = false; texture_border.addressMode[0] = cudaAddressModeBorder; Test_texture_border<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture wrap cudaArray* d_data_wrap = NULL; gpuErrchk(cudaMallocArray(&d_data_wrap, &texture_wrap.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_wrap, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_wrap, d_data_wrap); texture_wrap.normalized = true; texture_wrap.addressMode[0] = cudaAddressModeWrap; Test_texture_wrap<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture mirror cudaArray* d_data_mirror = NULL; gpuErrchk(cudaMallocArray(&d_data_mirror, &texture_mirror.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_mirror, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_mirror, d_data_mirror); texture_mirror.normalized = true ; texture_mirror.addressMode[0] = cudaAddressModeMirror; Test_texture_mirror<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); }

这些是输出

index -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 clamp 0 0 0 0 0 0 0 0 1 2 3 3 3 3 3 3 3 3 3 border 0 0 0 0 0 0 0 0 1 2 3 0 0 0 0 0 0 0 0 wrap 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 mirror 1 2 3 3 2 1 0 0 1 2 3 3 2 1 0 0 1 2 3

As mentioned by sgarizvi, CUDA supports only four, non-customizable address modes, namely, clamp, border, wrap and mirror, which are described in Section 3.2.11.1. of the CUDA programming guide.

The former two work in both unnormalized and normalized coordinates, while the latter two in normalized coordinates only.

To describe the first two, let us consider the unnormalized coordinates case and consider 1D signals, for the sake of simplicity. In this case, the input sequence is c[k], with k=0,...,M-1.

cudaAddressModeClamp

The signal c[k] is continued outside k=0,...,M-1 so that c[k] = c[0] for k < 0, and c[k] = c[M-1] for k >= M.

cudaAddressModeBorder

The signal c[k] is continued outside k=0,...,M-1 so that c[k] = 0 for k < 0and for k >= M.

Now, to describe the last two address modes, we are forced to consider normalized coordinates, so that the 1D input signal samples are assumed to be c[k / M], with k=0,...,M-1.

cudaAddressModeWrap

The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to M. In other words, c[(k + p * M) / M] = c[k / M] for any (positive, negative or vanishing) integer p.

cudaAddressModeMirror

The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to 2 * M - 2. In other words, c[l / M] = c[k / M] for any l and k such that (l + k)mod(2 * M - 2) = 0.

The following code illustrates all the four available address modes

#include <stdio.h> texture<float, 1, cudaReadModeElementType> texture_clamp; texture<float, 1, cudaReadModeElementType> texture_border; texture<float, 1, cudaReadModeElementType> texture_wrap; texture<float, 1, cudaReadModeElementType> texture_mirror; /********************/ /* CUDA ERROR CHECK */ /********************/ #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) { if (code != cudaSuccess) { fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); if (abort) exit(code); } } /******************************/ /* CUDA ADDRESS MODE CLAMPING */ /******************************/ __global__ void Test_texture_clamping(const int M) { printf("Texture clamping - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_clamp, -(float)threadIdx.x)); printf("Texture clamping - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_clamp, (float)(M + threadIdx.x))); } /****************************/ /* CUDA ADDRESS MODE BORDER */ /****************************/ __global__ void Test_texture_border(const int M) { printf("Texture border - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_border, -(float)threadIdx.x)); printf("Texture border - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_border, (float)(M + threadIdx.x))); } /**************************/ /* CUDA ADDRESS MODE WRAP */ /**************************/ __global__ void Test_texture_wrap(const int M) { printf("Texture wrap - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_wrap, -(float)threadIdx.x/(float)M)); printf("Texture wrap - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_wrap, (float)(M + threadIdx.x)/(float)M)); } /****************************/ /* CUDA ADDRESS MODE MIRROR */ /****************************/ __global__ void Test_texture_mirror(const int M) { printf("Texture mirror - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_mirror, -(float)threadIdx.x/(float)M)); printf("Texture mirror - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_mirror, (float)(M + threadIdx.x)/(float)M)); } /********/ /* MAIN */ /********/ void main(){ const int M = 4; // --- Host side memory allocation and initialization float *h_data = (float*)malloc(M * sizeof(float)); for (int i=0; i<M; i++) h_data[i] = (float)i; // --- Texture clamping cudaArray* d_data_clamping = NULL; gpuErrchk(cudaMallocArray(&d_data_clamping, &texture_clamp.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_clamping, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_clamp, d_data_clamping); texture_clamp.normalized = false; texture_clamp.addressMode[0] = cudaAddressModeClamp; dim3 dimBlock(2 * M, 1); dim3 dimGrid(1, 1); Test_texture_clamping<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture border cudaArray* d_data_border = NULL; gpuErrchk(cudaMallocArray(&d_data_border, &texture_border.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_border, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_border, d_data_border); texture_border.normalized = false; texture_border.addressMode[0] = cudaAddressModeBorder; Test_texture_border<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture wrap cudaArray* d_data_wrap = NULL; gpuErrchk(cudaMallocArray(&d_data_wrap, &texture_wrap.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_wrap, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_wrap, d_data_wrap); texture_wrap.normalized = true; texture_wrap.addressMode[0] = cudaAddressModeWrap; Test_texture_wrap<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); // --- Texture mirror cudaArray* d_data_mirror = NULL; gpuErrchk(cudaMallocArray(&d_data_mirror, &texture_mirror.channelDesc, M, 1)); gpuErrchk(cudaMemcpyToArray(d_data_mirror, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); cudaBindTextureToArray(texture_mirror, d_data_mirror); texture_mirror.normalized = true ; texture_mirror.addressMode[0] = cudaAddressModeMirror; Test_texture_mirror<<<dimGrid,dimBlock>>>(M); printf("\n\n\n"); }

Those are the outputs

index -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 clamp 0 0 0 0 0 0 0 0 1 2 3 3 3 3 3 3 3 3 3 border 0 0 0 0 0 0 0 0 1 2 3 0 0 0 0 0 0 0 0 wrap 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 mirror 1 2 3 3 2 1 0 0 1 2 3 3 2 1 0 0 1 2 3

更多推荐

本文发布于:2023-08-07 19:37:00,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1465579.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:纹理   模式   CUDA   textures   modes

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!