1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
|
#include <cstdio>
#define N 20
__global__ void (int *a, int *b, int *c){ int tid = blockIdx.x; if(tid< N){ c[tid] = a[tid] + b[tid]; } }
int main(){
int a[N]; int b[N]; int c[N]; int *dev_a, *dev_b, *dev_c;
cudaMalloc(&dev_a, N*sizeof(int)); cudaMalloc(&dev_b, N*sizeof(int)); cudaMalloc(&dev_c, N*sizeof(int));
for(int i =0 ; i<N; i++){ a[i] = i; b[i] = i; }
cudaMemcpy(dev_a, a, N*sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(dev_b, b, N*sizeof(int), cudaMemcpyHostToDevice);
add<<<N,1>>>(dev_a,dev_b,dev_c);
cudaMemcpy(c, dev_c, N*sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(dev_a); cudaFree(dev_b); cudaFree(dev_c);
for(int i =0 ;i<N; i++){ std::cout<<c[i]<<std::endl; } return 0;
}
|
近期评论