Write a basic hello world in cuda compile and run on cheetah
Solution
Step 1:
Copy the below code and place it in your \"/cfs/zorn/nobackup/u/username\" directory.
The below code takes the string \"Hello\", prints it, then passes it to CUDA with an array of offsets. The the offsets are added in parallel to produce the string \"World!\"
#include<stdio.h>
const int N=7;
const int blocksize=7;
_global_
void hello(char *a,int *b)
{
a[threadIdx.x+=b[threadIdx.x];
}
int main()
{
char a[N]=\"Hello\";
int b[N]={15,10,6,0,-11,1,0};
char *ad;
int *bd;
const int csize=N*sizeof(char);
const int isize=n*sizeof(int);
printf(\"%s\",a);
cudaMalloc((void**)&ad, csize);
cudaMalloc((void**)&bd, isize);
cudaMemcpy(ad,a,csize,cudaMemcpyHostToDevice);
cudaMemcpy(bd,b,isize,cudaMemcpyHostToDevice);
dim3 dimBlock(blocksize,1);
dim3 dimGrid(1,1);
hello<<<dimGrid,dimBlock>>>(ad,bd);
cudaMemcpy(a,ad,csize,cudaMemcpyDeviceToHost);
cudaFree(ad);
printf(\"%s\ \",a);
return EXIT_SUCCESS;
}
Step2:
Compile the above code. If it is named as helloworld.cu, execute the following command,
cd/cfs/zorn/nobackup/u/username
nvcc hello.cu -o hello.out
This will result in a binary called helloworld.out.
step 3:
To execute this binary on an interactive node, execute the following
qsub -1 nodes=1,walltime=00:10:00 -I
7025.gc.pdc.kth.se
cd/cfs/zorn/nobackup/u/username
./helloworld.out
Hello World!

