1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
|
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/types.h>
#include <OpenCL/opencl.h>
#include <time.h>
#include "SOIL.h"
////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Main Code//////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
int main (int argc, const char * argv[])
{
//Declar Functions
char * LoadFile2txt(const char *File);
//Declar Variables
int err; // error code returned from api calls
int gpu;
int width;
int height;
int channels;
int TimeTotGPU;
int TimeKernGPU;
int TimeTotCPU;
int TimeKernCPU;
// int RunLevel;
//GLuint *monImage;
const char* cSourceFile = "Image_Process.cl";
char filename[]= "Test3.bmp";
char *KernelSource;
size_t local; // local domain size for our calculation
cl_device_id device_id; // compute device id
cl_context context; // compute context
cl_command_queue commands; // compute command queue
cl_program program; // compute program
cl_kernel kernel; // compute kernel
cl_mem ImageInput; // device memory used for the input array
cl_mem ImageOutput; // device memory used for the output array
//cl_mem nbrPixel;
unsigned char *monImage = SOIL_load_image(filename,&width, &height, &channels, SOIL_LOAD_L);
unsigned char *imageTraitee;
channels=1;
int nbr_val_image = width * height * channels;
printf("Image width: %d \n", width);
printf("Image height: %d \n", height);
printf("Image channels: %d \n", channels);
printf("nbr_val_image de: %d \n", nbr_val_image);
printf("Vals pix monImage:\n%d %d %d\n%d %d %d\n%d %d %d\n%d %d %d\n\n",
monImage[0], monImage[1], monImage[2], monImage[3], monImage[4], monImage[5],
monImage[6], monImage[7], monImage[8], monImage[9], monImage[10], monImage[11]);
// Ajuste le nombre de valeurs de l'image au multiple de 256 au-dessus pour la création de la mémoire tampon
//
size_t LocalWorkSize = 256;
size_t GlobalWorkzise = ceil((double)nbr_val_image/(double)LocalWorkSize)*LocalWorkSize;
monImage = (void *)realloc(monImage,sizeof(cl_uchar)*GlobalWorkzise);
imageTraitee = (void *)malloc(sizeof(cl_uchar)*GlobalWorkzise);
for(gpu=0;gpu<2;gpu++)
{
// Prise de temps début de résolution GPU
clock_t TimeStartSolve = clock ();
// Connect to a compute device
//
err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); // if gpu=0 : solving on CPU, if gpu=1 : solving on GPU
if (err != CL_SUCCESS)
{
printf("Error: Failed to create a device group!\n");
return EXIT_FAILURE;
}
// Create a compute context
//
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
if (!context)
{
printf("Error: Failed to create a compute context!\n");
return EXIT_FAILURE;
}
// Create a command commands
//
commands = clCreateCommandQueue(context, device_id, 0, &err);
if (!commands)
{
printf("Error: Failed to create a command commands!\n");
return EXIT_FAILURE;
}
// Create the input and output arrays in device memory for our calculation
//
ImageInput = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_uchar) * GlobalWorkzise, NULL, NULL);
//nbrPixel = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * GlobalWorkzise, NULL, NULL);
ImageOutput = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uchar) * GlobalWorkzise, NULL, NULL);
if (!ImageInput || !ImageOutput)
{
printf("Error: Failed to allocate device memory!\n");
exit(1);
}
// Create the compute program from the source buffer
//
KernelSource = LoadFile2txt (cSourceFile);
program = clCreateProgramWithSource(context, 1, (const char **) &KernelSource, NULL, &err);
if (!program)
{
printf("Error: Failed to create compute program!\n");
return EXIT_FAILURE;
}
// Build the program executable
//
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
size_t len;
char buffer[2048];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
exit(1);
}
// Create the compute kernel in the program we wish to run
//
kernel = clCreateKernel(program, "Image_Processing", &err);
if (!kernel || err != CL_SUCCESS)
{
printf("Error: Failed to create compute kernel!\n");
exit(1);
}
// Set the arguments to our compute kernel
//
err = 0;
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &ImageInput);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &ImageOutput);
//err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &nbrPixel);
if (err != CL_SUCCESS)
{
printf("Error: Failed to set kernel arguments! %d\n", err);
exit(1);
}
// Write our data set into the input array in device memory
//
err = clEnqueueWriteBuffer(commands, ImageInput, CL_TRUE, 0, sizeof(cl_uchar) * GlobalWorkzise, monImage, 0, NULL, NULL);
//err |= clEnqueueWriteBuffer(commands, nbrPixel , CL_TRUE, 0, sizeof(int) * GlobalWorkzise, nbr_val_image, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to write to source array!\n");
exit(1);
}
// Get the maximum work group size for executing the kernel on the device
//
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to retrieve kernel work group info! %d\n", err);
exit(1);
}
//printf("local = %d\n", (int)local);
// Prise de temps début de résolution du kernel
clock_t TimeStartKernel = clock ();
// Execute the kernel over the entire range of our 1d input data set
// using the maximum number of work group items for this device
//
err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &GlobalWorkzise, &local, 0, NULL, NULL);
if (err)
{
printf("Error: Failed to execute kernel!\n");
return EXIT_FAILURE;
}
// Wait for the command commands to get serviced before reading back results
//
clFinish(commands);
clock_t TimeFinishKernel = clock ();
// Read back the results from the device to verify the output
//
err = clEnqueueReadBuffer(commands, ImageOutput, CL_TRUE, 0, sizeof(cl_uchar) * GlobalWorkzise, imageTraitee, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
printf("Error: Failed to read output array! %d\n", err);
exit(1);
}
// Prise de temps fin résolution du kernel
clock_t TimeFinishSolve = clock ();
int TimeGPU = (((TimeFinishSolve - TimeStartSolve) *1e6) / CLOCKS_PER_SEC);
int TimeKernel = (((TimeFinishKernel - TimeStartKernel)*1e6) / CLOCKS_PER_SEC);
printf("Vals pix imageTraitee:\n%d %d %d\n%d %d %d\n%d %d %d\n%d %d %d\n",
imageTraitee[0], imageTraitee[1], imageTraitee[2], imageTraitee[3], imageTraitee[4], imageTraitee[5],
imageTraitee[6], imageTraitee[7], imageTraitee[8], imageTraitee[9], imageTraitee[10], imageTraitee[11]);
// Enregistrement de l'image traitée en BMP
if(gpu==1)
{
err = SOIL_save_image("GPUProcessedImage.bmp", SOIL_SAVE_TYPE_BMP, width, height, 1, imageTraitee);
TimeTotGPU = TimeGPU;
TimeKernGPU = TimeKernel;
}
else
{
err = SOIL_save_image("CPUProcessedImage.bmp", SOIL_SAVE_TYPE_BMP, width, height, 1, imageTraitee);
TimeTotCPU = TimeGPU;
TimeKernCPU = TimeKernel;
}
// Shutdown and cleanup
clReleaseMemObject(ImageInput);
clReleaseMemObject(ImageOutput);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(commands);
clReleaseContext(context);
}
printf("Temps de réolution du programme sur GPU: %d [usec]\n", TimeTotGPU);
printf("Temps de réolution du programme sur CPU: %d [usec]\n\n", TimeTotCPU);
printf("La résulotion du programme sur GPU est environ %d fois plus rapide que sur CPU\n\n", TimeTotCPU / TimeTotGPU);
printf("Temps de réolution du noyau sur GPU: %d [usec]\n", TimeKernGPU);
printf("Temps de réolution du noyau sur CPU: %d [usec]\n\n", TimeKernCPU);
printf("La résulotion du noyau sur GPU est environ %d fois plus rapide que sur CPU\n\n", TimeKernCPU / TimeKernGPU);
free(monImage);
free(imageTraitee);
return 0;
}
////////////////////////////////////////////////////////////////////////////////
//////////////////////////////Annexe functions//////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
char * LoadFile2txt (const char *File)
{
FILE * pFile;
long lSize;
size_t result;
char * TXTBuffer;
pFile = fopen (File, "r");
if (pFile==NULL)
{
printf("Fct LoadFile2txt: File error");
}
// obtain file size:
fseek (pFile , 0 , SEEK_END);
lSize = ftell (pFile);
rewind (pFile);
// allocate memory to contain the whole file:
TXTBuffer = (char*) malloc (sizeof(char)*lSize);
if (TXTBuffer == NULL)
{
printf("Fct LoadFile2txt: Memory error");
}
// copy the file into the buffer:
result = fread (TXTBuffer,1,lSize,pFile);
if (result != lSize)
{
printf("Fct LoadFile2txt: Reading error");
}
// terminate
fclose (pFile);
return TXTBuffer;
} |
Partager