Cuda实战

编程入门 行业动态 更新时间:2024-10-08 18:33:40

Cuda<a href=https://www.elefans.com/category/jswz/34/1769775.html style=实战"/>

Cuda实战

说明

样例的内容是每个CPU启用一个线程,执行一个核函数,然后这些线程平均分配给n个GPU上面运行,采用多线程技术实现。

代码


#include <cuda_runtime.h>
#include <pthread.h> //多线程
#include <unistd.h>
#include <stdio.h>  // stdio functions are used since C++ streams aren't necessarily thread safe
#include <pthread.h>
#define address unsigned long long int  //实现指针与整数的灵活转变
int* finish_code;   //检查单独的一个线程是否执行完毕,为0代表执行完毕
// 获取CPU数量 Linux操作系统使用
int Get_CPU_Number_Linux() {// #include <unistd.h>return sysconf( _SC_NPROCESSORS_CONF);
}// 获取GPU数量 不限操作系统
int Get_GPU_Number(){// #include <cuda_runtime.h>int num = 0;cudaGetDeviceCount(&num);return num;
}//核函数,执行数组每个元素的固定值增加
__global__ void kernelAddConstant(int *g_a, int b) {int idx = blockIdx.x * blockDim.x + threadIdx.x;g_a[idx] += b;
}// 线程运行函数
void* exec(void* args){//进行函数声明,否则跨线程无法调用GPU核函数__global__ void kernelAddConstant(int *g_a, int b);// 将void*指针强制转为address*指针,存储的内容为若干整数,这些整数的数值即为地址//addr[0] = cpu_thread_id; addr[1] = n//addr[2] = nbytes; addr[3] = b; addr[4] = dataaddress* addr = (address*)args;// 传入的整数强制转换为指针的地址int cpu_thread_id = ((int*)(addr[0]))[0];      //获得CPU线程索引int num_cpu_threads = Get_CPU_Number_Linux();   //获得CPU线程数量int n = ((int*)(addr[1]))[0];      //获得GPU线程总数int nbytes = ((int*)(addr[2]))[0];      //获得数组占用内存大小int b = ((int*)(addr[3]))[0];      //获得数组元素自增值int* data = ((int*)(addr[4]));      //获得数组数据// set and check the CUDA device for this CPU threadint gpu_id = -1;cudaSetDevice(cpu_thread_id %Get_GPU_Number());  // "% num_gpus" allows more CPU threads than GPU devicescudaGetDevice(&gpu_id);printf("CPU thread %d (of %d) uses CUDA device %d\n", cpu_thread_id,num_cpu_threads, gpu_id);int *d_a = 0;  // pointer to memory on the device associated with this CPU threadint *sub_a =data +cpu_thread_id * n /num_cpu_threads;  // pointer to this CPU thread's portion of dataunsigned int nbytes_per_kernel = nbytes / num_cpu_threads;dim3 gpu_threads(128);  // 128 threads per blockdim3 gpu_blocks(n / (gpu_threads.x * num_cpu_threads));cudaMalloc((void **)&d_a, nbytes_per_kernel);cudaMemset(d_a, 0, nbytes_per_kernel);cudaMemcpy(d_a, sub_a, nbytes_per_kernel, cudaMemcpyHostToDevice);kernelAddConstant<< <gpu_blocks, gpu_threads>> >(d_a, b);cudaMemcpy(sub_a, d_a, nbytes_per_kernel, cudaMemcpyDeviceToHost);cudaFree(d_a);finish_code[cpu_thread_id] = 0;printf("thread %d finish!\n", cpu_thread_id);
}//检查数组的每个值是不是index + b
int correctResult(int *data, const int n, const int b) {for (int i = 0; i < n; i++)if (data[i] != i + b) return 0;return 1;
}int main() {//获取CPU芯片数量int num_cpus = Get_CPU_Number_Linux();//获取GPU芯片数量int num_gpus = Get_GPU_Number();//如果没有GPU,则直接返回if (num_gpus < 1) {printf("no CUDA capable devices were detected!\n");return 1;}//打印CPU数量和GPU数量printf("number of host CPUs:\t%d\n", num_cpus);printf("number of device GPUs:\t%d\n", num_gpus);//打印每一个GPU设备的信息for (int i = 0; i < num_gpus; i++) {cudaDeviceProp dprop;cudaGetDeviceProperties(&dprop, i);printf("   %d: %s\n", i, dprop.name);}printf("---------------------------\n");/// 初始化数据int n = num_gpus * 8192;int nbytes = n * sizeof(int);int *data = 0;  // CPU数据指针int b = 3;   // 数组单个元素增加值data = (int *)malloc(nbytes);  //CPU内数组分配内存//检查CPU是否成功分配内存if (0 == data) {printf("couldn't allocate CPU memory\n");return 1;}//对CPU里的数组数据进行赋值for (int i = 0; i < n; i++) data[i] = i;printf("---------------------------\n");int thread_num = num_cpus;int* idx = (int*) malloc(thread_num*sizeof(int));finish_code = (int*) malloc(thread_num*sizeof(int));for (int i=0;i<thread_num;i++){pthread_t thread;idx[i] = i;finish_code[i] = 1;address* addr = (address*) malloc(5*sizeof(address));addr[0] = (address)(&(idx[i]));addr[1] = (address)(&(n));addr[2] = (address)(&(nbytes));addr[3] = (address)(&(b));addr[4] = (address)(data);pthread_create(&thread,NULL,exec,addr);pthread_join(thread, NULL);}for (int i=0;i<thread_num;i++){//while (finish_code[i]) int a = 0;}//检查结果是否正确bool bResult = correctResult(data, n, b);if (data) free(data);  // 释放CPU资源if (bResult) printf("SUCCESS!\n");if (!bResult) printf("FAIL!\n");return  0;
}

更多推荐

Cuda实战

本文发布于:2024-03-12 19:41:43,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1732265.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:实战   Cuda

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!