
|
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/types.h>
#include <OpenCL/opencl.h>
#include <time.h>
#include "SOIL.h"
////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Main Code//////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
int main (int argc, const char * argv[])
{
//Declar Functions
char * LoadFile2txt(const char *File);
//Declar Variables
int err; // error code returned from api calls
int gpu;
int width;
int height;
int channels;
int TimeTotGPU;
int TimeKernGPU;
int TimeTotCPU;
int TimeKernCPU;
// int RunLevel;
//GLuint *monImage;
const char* cSourceFile = "Image_Process.cl";
char filename[]= "Test3.bmp";
char *KernelSource;
size_t local; // local domain size for our calculation
cl_device_id device_id; // compute device id
cl_context context; // compute context
cl_command_queue commands; // compute command queue
cl_program program; // compute program
cl_kernel kernel; // compute kernel
cl_mem ImageInput; // device memory used for the input array
cl_mem ImageOutput; // device memory used for the output array
//cl_mem nbrPixel;
unsigned char *monImage = SOIL_load_image(filename,&width, &height, &channels, SOIL_LOAD_L);
unsigned char *imageTraitee;
channels=1;
int nbr_val_image = width * height * channels;
printf("Image width: %d \n", width);
printf("Image height: %d \n", height);
printf("Image channels: %d \n", channels);
printf("nbr_val_image de: %d \n", nbr_val_image);
printf("Vals pix monImage:\n%d %d %d\n%d %d %d\n%d %d %d\n%d %d %d\n\n",
monImage[0], monImage[1], monImage[2], monImage[3], monImage[4], monImage[5],
monImage[6], monImage[7], monImage[8], monImage[9], monImage[10], monImage[11]);
// Ajuste le nombre de valeurs de l'image au multiple de 256 au-dessus pour la création de la mémoire tampon
//
size_t LocalWorkSize = 256;
size_t GlobalWorkzise = ceil((double)nbr_val_image/(double)LocalWorkSize)*LocalWorkSize;
monImage = (void *)realloc(monImage,sizeof(cl_uchar)*GlobalWorkzise);
imageTraitee = (void *)malloc(sizeof(cl_uchar)*GlobalWorkzise);
for(gpu=0;gpu<2;gpu++)
{
// Prise de temps début de résolution GPU
clock_t TimeStartSolve = clock ();
// Connect to a compute device
//
err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); // if gpu=0 : solving on CPU, if gpu=1 : solving on GPU
if (err != CL_SUCCESS)
{
printf("Error: Failed to create a device group!\n");
return EXIT_FAILURE;
}
// Create a compute context
//
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
if (!context)
{
printf("Error: Failed to create a compute context!\n");
return EXIT_FAILURE;
}
// Create a command commands
//
commands = clCreateCommandQueue(context, device_id, 0, &err);
if (!commands)
{
printf("Error: Failed to create a command commands!\n");
return EXIT_FAILURE;
}
// Create the input and output arrays in device memory for our calculation
//
ImageInput = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_uchar) * GlobalWorkzise, NULL, NULL);
//nbrPixel = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * GlobalWorkzise, NULL, NULL);
ImageOutput = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uchar) * GlobalWorkzise, NULL, NULL);
if (!ImageInput || !ImageOutput)
{
printf("Error: Failed to allocate device memory!\n");
exit(1);
}
// Create the compute program from the source buffer
//
KernelSource = LoadFile2txt (cSourceFile);
program = clCreateProgramWithSource(context, 1, (const char **) &KernelSource, NULL, &err);
if (!program)
{
printf("Error: Failed to create compute program!\n");
return EXIT_FAILURE;
}
// Build the program executable
//
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
size_t len;
char buffer[2048];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
exit(1);
}
// Create the compute kernel in the program we wish to run
//
kernel = clCreateKernel(program, "Image_Processing", &err);
if (!kernel || err != CL_SUCCESS)
{
printf("Error: Failed to create compute kernel!\n");
exit(1);
}
// Set the arguments to our compute kernel
//
err = 0;
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &ImageInput);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &ImageOutput);
//err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &nbrPixel);
if (err != CL_SUCCESS)
{
printf("Error: Failed to set kernel arguments! %d\n", err);
exit(1);
}
// Write our data set into the input array in device memory
//
err = clEnqueueWriteBuffer(commands, ImageInput, CL_TRUE, 0, sizeof(cl_uchar) * GlobalWorkzise, monImage, 0, NULL, NULL);
//err |= clEnqueueWriteBuffer(commands, nbrPixel , CL_TRUE, 0, sizeof(int) * GlobalWorkzise, nbr_val_image, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to write to source array!\n");
exit(1);
}
// Get the maximum work group size for executing the kernel on the device
//
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to retrieve kernel work group info! %d\n", err);
exit(1);
}
//printf("local = %d\n", (int)local);
// Prise de temps début de résolution du kernel
clock_t TimeStartKernel = clock ();
// Execute the kernel over the entire range of our 1d input data set
// using the maximum number of work group items for this device
//
err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &GlobalWorkzise, &local, 0, NULL, NULL);
if (err)
{
printf("Error: Failed to execute kernel!\n");
return EXIT_FAILURE;
}
// Wait for the command commands to get serviced before reading back results
//
clFinish(commands);
clock_t TimeFinishKernel = clock ();
// Read back the results from the device to verify the output
//
err = clEnqueueReadBuffer(commands, ImageOutput, CL_TRUE, 0, sizeof(cl_uchar) * GlobalWorkzise, imageTraitee, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
printf("Error: Failed to read output array! %d\n", err);
exit(1);
}
// Prise de temps fin résolution du kernel
clock_t TimeFinishSolve = clock ();
int TimeGPU = (((TimeFinishSolve - TimeStartSolve) *1e6) / CLOCKS_PER_SEC);
int TimeKernel = (((TimeFinishKernel - TimeStartKernel)*1e6) / CLOCKS_PER_SEC);
printf("Vals pix imageTraitee:\n%d %d %d\n%d %d %d\n%d %d %d\n%d %d %d\n",
imageTraitee[0], imageTraitee[1], imageTraitee[2], imageTraitee[3], imageTraitee[4], imageTraitee[5],
imageTraitee[6], imageTraitee[7], imageTraitee[8], imageTraitee[9], imageTraitee[10], imageTraitee[11]);
// Enregistrement de l'image traitée en BMP
if(gpu==1)
{
err = SOIL_save_image("GPUProcessedImage.bmp", SOIL_SAVE_TYPE_BMP, width, height, 1, imageTraitee);
TimeTotGPU = TimeGPU;
TimeKernGPU = TimeKernel;
}
else
{
err = SOIL_save_image("CPUProcessedImage.bmp", SOIL_SAVE_TYPE_BMP, width, height, 1, imageTraitee);
TimeTotCPU = TimeGPU;
TimeKernCPU = TimeKernel;
}
// Shutdown and cleanup
clReleaseMemObject(ImageInput);
clReleaseMemObject(ImageOutput);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(commands);
clReleaseContext(context);
}
printf("Temps de réolution du programme sur GPU: %d [usec]\n", TimeTotGPU);
printf("Temps de réolution du programme sur CPU: %d [usec]\n\n", TimeTotCPU);
printf("La résulotion du programme sur GPU est environ %d fois plus rapide que sur CPU\n\n", TimeTotCPU / TimeTotGPU);
printf("Temps de réolution du noyau sur GPU: %d [usec]\n", TimeKernGPU);
printf("Temps de réolution du noyau sur CPU: %d [usec]\n\n", TimeKernCPU);
printf("La résulotion du noyau sur GPU est environ %d fois plus rapide que sur CPU\n\n", TimeKernCPU / TimeKernGPU);
free(monImage);
free(imageTraitee);
return 0;
}
////////////////////////////////////////////////////////////////////////////////
//////////////////////////////Annexe functions//////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
char * LoadFile2txt (const char *File)
{
FILE * pFile;
long lSize;
size_t result;
char * TXTBuffer;
pFile = fopen (File, "r");
if (pFile==NULL)
{
printf("Fct LoadFile2txt: File error");
}
// obtain file size:
fseek (pFile , 0 , SEEK_END);
lSize = ftell (pFile);
rewind (pFile);
// allocate memory to contain the whole file:
TXTBuffer = (char*) malloc (sizeof(char)*lSize);
if (TXTBuffer == NULL)
{
printf("Fct LoadFile2txt: Memory error");
}
// copy the file into the buffer:
result = fread (TXTBuffer,1,lSize,pFile);
if (result != lSize)
{
printf("Fct LoadFile2txt: Reading error");
}
// terminate
fclose (pFile);
return TXTBuffer;
} |
Partager