OpenCLの行列和のC++化

OpenCLのライブラリを作成する前段階として、昔書いたOpenCLの行列和のプログラムのC++化を行った。まだ完全ではないがとりあえず動くのであげておく。

[cpp]
/*
* main.h
*
*/

#ifndef MAIN_H_
#define MAIN_H_

#include<iostream>
#include<cstdlib>
#include<string>
#include<cstdio>
#include"clapi.h"

#ifdef __APPLE__
#include<OpenCL/opencl.h>
#else
#include<CL/cl.h>
#endif //__APPLE

#define MAX_SOURCE_SIZE (0x100000)

#endif /* MAIN_H_ */
[/cpp]

[cpp]
/*
* main.cpp
*
*/
#include"main.h"

using namespace std;
int main(){
int fvar=4096, fwid=4096, lvar=4096, lwid=4096;
double *mtrx1,*mtrx2,*Out;
FILE *fp;
if ((fp = fopen("inputD.txt","r")) == NULL)
{
printf("file open error!!n");
exit(EXIT_FAILURE);
}
int size;
fscanf(fp, "%d", &size);

mtrx1 = (double*) malloc(size * size * sizeof(double));
mtrx2 = (double*) malloc(size * size * sizeof(double));
Out = (double*) malloc(size * size * sizeof(double));

if(mtrx1==NULL) return 1;
if(mtrx2==NULL) return 1;
if(Out==NULL) return 1;

int i, j;
for(i = 0; i < size; i++)
{
for(j = 0; j < size; j++)
{
Out[i * size + j] = 0;
fscanf(fp, "%lf", &mtrx1[i * size + j]);
}
}

for(i = 0; i < size; i++)
{
for(j = 0; j < size; j++)
{
fscanf(fp,"%lf", &mtrx2[i * size + j]);
}
}

fclose(fp);

//kotake
//1.カーネルプログラム指定
string filename="test.cl";
//2.オブジェクト生成???
clapi cl(filename);
//3.メンバ関数実行
//cl.auto(入力数, データ1のdouble型配列の個数, データ1の配列のアドレス, データ2の配列の個数, データ2の配列のアドレス, ….)

if(!cl.clauto(2, size*size, mtrx1, size*size, mtrx2))return -1;
//cl.doOpenCL();
// iif(cl.hikisu != true || doOpenCL != true) return 0;

Out = cl.getOut();

//結果表示
cout<<"加算結果"<<endl;
for(int i = 0 ; i < fvar ; i++){
for(int j = 0 ; j < fwid ; j++){
cout<< Out[i*fwid+j] << " " ;
}
cout << endl;
}

free(mtrx1);
free(mtrx2);
free(Out);
}
[/cpp]

[cpp]
/*
* clapi.h
*
*/

#ifndef CLAPI_H_
#define CLAPI_H_

#ifdef __APPLE__
#include<OpenCL/opencl.h>
#else
#include<CL/cl.h>
#endif //__APPLE

#include<stdarg.h>
#include<string>
#include<cstdio>
#include<iostream>

#define MAX_SOURCE_SIZE (0x100000)

using namespace std;

class clapi {
public:
clapi();
clapi(string);
~clapi();
bool clauto(int , …);
bool doOpenCL();
double* getOut();
cl_device_id device_list[4];
private:
void builderr();
cl_int status;
cl_platform_id platforms[2];
cl_uint num_platforms;
cl_context context;
//cl_device_id device_list[4]; //なぜか2じゃできない
cl_uint num_device;
cl_context_properties properties[3];
cl_command_queue queue;
cl_program program;
cl_uint pro_info;
cl_kernel kernel;
cl_mem memIn[10];
cl_mem memOut;
string filename;
double* s[10];
int size[10];
int num_hikisu;
double* Out;
};

#endif /* CLAPI_H_ */
[/cpp]

[cpp]
/*
* clapi.cpp
*
*/

#include "clapi.h"

using namespace std;

clapi::clapi() {

}

clapi::clapi(string tmp){
filename = tmp;
}

clapi::~clapi(){
clReleaseMemObject(memOut);
for(int i = 0; i < num_hikisu; i++) clReleaseMemObject(memIn[i]);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
}

bool clapi::clauto(int n, …) {
num_hikisu = n;
va_list args;
va_start(args, n);

for (int t = 0; t < n ; t++) { //forでループさせ渡された引数1個ずつについて処理する。すべてsに+=していく。
size[t] = va_arg(args,int);//double型配列の個数
s[t] = va_arg(args,double*); //可変長引数を取り出す. 第一引数はva_list型の変数。第二引数には取り出す型。
}
va_end(args);

doOpenCL();

return true;
}

bool clapi::doOpenCL() {
status = clGetPlatformIDs(2, platforms, &num_platforms);
if (status != CL_SUCCESS || num_platforms <= 0) {
fprintf(stdout, "clGetPlatformIDs failed.n");
printf("%dn", status);
return false;
}
// 最初の要素として返されたプラットフォームIDを、プロパティにセットする
properties[0] = CL_CONTEXT_PLATFORM;
properties[1] = (cl_context_properties)platforms[1];
properties[2] = 0;

//1.デバイスの取得
status = clGetDeviceIDs(platforms[1], CL_DEVICE_TYPE_GPU, 4, &device_list[0], &num_device);
if (status != CL_SUCCESS || num_device <= 0) {
fprintf(stdout, "clGetDeviceIDs failed.n");
printf("%dn", status);
return false;
}

context = clCreateContext(properties, num_device, &device_list[0], NULL,NULL, &status);
if (status != CL_SUCCESS) {
cout << "clCreateContext failednError Code: " << status << endl;
return false;
}

//3.コマンドキューの作成
queue = clCreateCommandQueue(context, device_list[0], 0, &status);
if (status != CL_SUCCESS) {
cout << "clCreateCommandQueue failednError Code: " << status << endl;
return false;
}
//4.プログラムオブジェクトの作成
FILE *fp;
size_t source_size;
char *source_str;

fp = fopen(filename.c_str(), "r");
if (!fp) {
fprintf(stderr, "Failed to leas kernel.n");
return false;
}
source_str = (char *) malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);

program = clCreateProgramWithSource(context, 1, (const char**) &(source_str), &source_size, &status);
if (status != CL_SUCCESS) {
cout << "clCreateProgramWithSource failednError Code: " << status << endl;
return false;
}

//5.プログラムのビルド
status = clBuildProgram(program, num_device, &device_list[0], NULL, NULL, NULL);
if (status != CL_SUCCESS) {
cout << "clBuildProgram failed nError Code: "<< status << endl;
builderr();
return false;
}

//6.カーネルの作成
kernel = clCreateKernel(program, "calc", &status);
if (status != CL_SUCCESS) {
cout << "clCreateKernel failednError Code: " << status << endl;
return false;
}

//7メモリオブジェクトの作成
for(int i = 0 ; i<num_hikisu ; i++)
{
memIn[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(double)*size[i], (void*) s[i], &status);
if (status != CL_SUCCESS) {
cout << "clCreateBuffer failednError Code: " << status << endl;
return false;
}

}
memOut = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(double)*size[1] , NULL, &status);
if (status != CL_SUCCESS) {
cout << "clCreateBuffer failednError Code: " << status << endl;
return false;
}

//8.カーネルに渡す引数の設定
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &memOut);
if (status != CL_SUCCESS) {
cout << "clSetKernelArg failednError Code: " << status << endl;
return false;
}

for(int i = 0; i< num_hikisu; i++)
{
status = clSetKernelArg(kernel,i,sizeof(cl_mem),(void *) &memIn[i]);
if (status != CL_SUCCESS) {
cout << "clSetKernelArg failednError Code: " << status << endl;
return false;
}
}

//9.カーネルの実行
size_t globalsize[] = { size[0] };
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalsize, NULL, 0, NULL, NULL);
if (status != CL_SUCCESS) {
cout << "clEnqueueNDRangeKernel failednError Code: " << status << endl;
return false;
}

//10.結果の取得
Out = (double*) malloc(size[0] * sizeof(double));
status = clEnqueueReadBuffer(queue, memOut, CL_TRUE, 0, sizeof(double)*size[1], Out, 0, NULL, NULL);
if (status != CL_SUCCESS) {
cout << "clEnqueueReadBuffer failednError Code: " << status << endl;
return false;
}

return true;
}

double* clapi::getOut(){
return Out;
}

void clapi::builderr() {
size_t logsize;
status = clGetProgramBuildInfo(program, device_list[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &logsize);
if (status == CL_SUCCESS) {
//ログを格納するためのバッファをアロケートする
char *logbuffer;
logbuffer = new char[logsize + 1];
if (logbuffer == NULL) {
printf("memory allocation failed.n");
return;
}

status = clGetProgramBuildInfo(program, device_list[0], CL_PROGRAM_BUILD_LOG, logsize, logbuffer, NULL);
cout << status << endl;
if (status == CL_SUCCESS) {
logbuffer[logsize] = ‘\0’;
cout << "build log" << endl;
cout << logbuffer << endl;
}
delete[] logbuffer;
}
else {
cout << "clGetProgramBuildInfo failed" << endl;
}
}
[/cpp]

[cpp]
/*
* test.cl
*
*/

#pragma OPENCL EXTENSION cl_khr_fp64: enable

__kernel void
calc(
__global const double *in1,
__global const double *in2,
__global double *out)

{
int index = get_global_id(0);
out[index] = in1[index]+in2[index];
}
[/cpp]

コメントを残す

メールアドレスが公開されることはありません。 * が付いている欄は必須項目です