问题描述
- 用cublas库求矩阵范数,求各位大神看看代码哪里不对
-
void *device_numofduanyuan;
cublasHandle_t handler;
cudaEvent_t cublas_start,cublas_stop,cula_start,cula_stop;
clock_t begin,end;begin=clock(); cudaEventCreate(&cublas_start); cudaEventCreate(&cublas_stop); cudaEventCreate(&cula_start); cudaEventCreate(&cula_stop); //cublasCreate(&handle); cudaMalloc((void **)&device_numofduanyuan,sizeof(int)); cudaMemset(device_numofduanyuan,0,sizeof(int)); if (type==3||type==0||type==1||type==2) { //float *matVt,*matMt,*matOMEGA; //matVt=(float *)malloc(sizeof(float)*bands*width); //matVt 所有波段的一行数据; //matMt=(float *)malloc(sizeof(float)*height*width); //matOMEGA=(float *)malloc(sizeof(float)*bands*numofduanyuan); matrix_f matVt,matMt,matOMEGA; Init_fmatrix(matVt,bands,width); Init_fmatrix(matMt,height,width); Init_fmatrix(matOMEGA,bands,numofduanyuan); float *temp_data=(float *)host_data; float *dev_matVt,*dev_matMt,*dev_matOMEGA; cudaMalloc((void**)&dev_matVt,sizeof(float)*bands); cudaMalloc((void**)&dev_matMt,sizeof(float)*height*width); cudaMalloc((void**)&dev_matOMEGA,sizeof(float)*bands*numofduanyuan);
#pragma unroll
for (int i=0;i<height;i++)
{
for (int j=0;j<bands;j++)
{
for (int k=0;k<width;k++)
{
int pos=i*width+k+j*(width*height);
//float temp=host_data[pos];
matVt.mat[j][k]=temp_data[pos];
}
}
cudaMemcpy(dev_matVt,matVt.mat,sizeof(float)*width*bands,cudaMemcpyHostToDevice);
cudaMemcpy(dev_matMt,matMt.mat,sizeof(float)*height*width,cudaMemcpyHostToDevice);
cublasSnrm2(handler,bands,dev_matVt,sizeof(float),dev_matMt);
解决方案
时间: 2024-12-30 20:27:44