__kernel void AlphaKernel(__global float *cl_input1, __global float *cl_input2, __global float *cl_output)
{
	unsigned int g_size = get_num_groups(0);
	unsigned int g_id = get_group_id(0);
	//unsigned int l_size = get_local_size(0);
	unsigned int l_id = get_local_id(0);

//get_num_groups(0);
	size_t i = (g_size*g_id) + l_id;

	cl_output[i] = cl_input1[i] - cl_input2[i];
	//output[i] = input1[i] - input2[i];

	//output[1] = 10; 
	//testing = 10;

	//printf("result: %d\n", i + j);
	
	//int i = get_global_id(0);
	//cl_output[5] = cl_input1[5] - cl_input2[5];
    //const uint group_id = get_global_id(0) / get_local_size(0);
    //const uint group_size = get_local_size(0);
}
