









#define FOUND (0xFF)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce

__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uint4 * restrict input,
volatile __global uint*restrict output, __global uint4*restrict padcache,
const uint4 midstate0, const uint4 midstate16, const uint target){

	uint gid = get_global_id(0);
	uint4 X[8];
	uint tstate[8] = {0, 0, 0, 0, 0, 0, 0, 0};
	uint ostate[8] = {0, 0, 0, 0, 0, 0, 0, 0};
	uint tstatebak[8];
	//uint pad[16];
	//uint data[16];
	uint tmp[16];

	uint pad[16] = {midstate0.x, midstate0.y, midstate0.z, midstate0.w, midstate16.x, midstate16.y, midstate16.z, midstate16.w,
					K[82], K[82], K[82], K[82], K[82], K[82], K[82], K[82]};

	//uint4 test = (uint4)(input[4].x,input[4].y,input[4].z,gid);
	uint data[16] = {input[4].x, input[4].y, input[4].z, gid, K[84], 0x0U, 0x0U, 0x0U, 0x0U, 0x0U, 0x0U, 0x0U, 0x0U, 0x0U, 0x0U, K[86]};


	//uint4_to_uintp(data, (uint4)(input[4].x,input[4].y,input[4].z,gid));
	//uint4_to_uintp(data+4, (uint4)(K[84],0,0,0));
	//uint4_to_uintp(data+8, (uint4)(0,0,0,0));
	//uint4_to_uintp(data+12, (uint4)(0,0,0, K[86]));
	SHA256(pad, data);

	for(uint i=0; i<8; i++)
		tmp[i] = pad[i];
	for(uint i=0; i<8; i++)
		pad[i] ^= K[82];
	for(uint i=8; i<16; i++)
		pad[i] = K[82];

	SHA256_fresh(ostate, pad);

	//uint4_to_uintp(pad, (uint4)(tmp[0]^K[83]));
	//uint4_to_uintp(pad, (uint4)(tmp[0]^K[83]));

	for(uint i=0; i<8; i++)
		pad[i] = tmp[i]^ K[83];

	for(uint i=8; i<16; i++)
		pad[i] = K[83];

	SHA256_fresh(tstate, pad);

	//save the tstate
	for(uint i=0; i<8; i++)
		tstatebak[i] = tstate[i];

	//copy input into tmp
	tmp[0] = input[0].x;
	tmp[1] = input[0].y;
	tmp[2] = input[0].z;
	tmp[3] = input[0].w;
	tmp[4] = input[1].x;
	tmp[5] = input[1].y;
	tmp[6] = input[1].z;
	tmp[7] = input[1].w;
	tmp[8] = input[2].x;
	tmp[9] = input[2].y;
	tmp[10] = input[2].z;
	tmp[11] = input[2].w;
	tmp[12] = input[3].x;
	tmp[13] = input[3].y;
	tmp[14] = input[3].z;
	tmp[15] = input[3].w;
	//uint4p_to_uintp(tmp, input);
	//uint4p_to_uintp(tmp+4, &input[1]);
	//uint4p_to_uintp(tmp+8, &input[2]);
	//uint4p_to_uintp(tmp+12, &input[3]);
	
	SHA256(tstate, tmp);

	//these will stay the same below
	pad[8]  = K[84];
	pad[9]  = 0x0;
	pad[10] = 0x0;
	pad[11] = 0x0;
	pad[12] = 0x0;
	pad[13] = 0x0;
	pad[14] = 0x0;
	pad[15] = K[88];

	data[4] = 0x0;
	data[5] = K[84];
	data[6] = 0x0U;
	data[7] = 0x0U;
	data[8] = 0x0U;
	data[9] = 0x0U;
	data[10] = 0x0U;
	data[11] = 0x0U;
	data[12] = 0x0U;
	data[13] = 0x0U;
	data[14] = 0x0U;
	data[15] = K[87];


	//for(uint i=0; i<32; i+=8){
	for(uint i=0; i<4; i++){
		pad[0] = tstate[0];
		pad[1] = tstate[1];
		pad[2] = tstate[2];
		pad[3] = tstate[3];
		pad[4] = tstate[4];
		pad[5] = tstate[5];
		pad[6] = tstate[6];
		pad[7] = tstate[7];
		//idx = i<<1;
		tmp[0] = ostate[0];
		tmp[1] = ostate[1];
		tmp[2] = ostate[2];
		tmp[3] = ostate[3];
		tmp[4] = ostate[4];
		tmp[5] = ostate[5];
		tmp[6] = ostate[6];
		tmp[7] = ostate[7];
		data[4]++;
		SHA256(pad, data);
		SHA256(tmp, pad);
		//save to X
		uintp_to_uint4p((X+(i<<1)), tmp);
		uintp_to_uint4p((X+(i<<1)+1), &tmp[4]);
	}

	scrypt_core(X,padcache);
	uint4_to_uintp(tmp,    X[0]);
	uint4_to_uintp(tmp+4,  X[1]);
	uint4_to_uintp(tmp+8,  X[2]);
	uint4_to_uintp(tmp+12, X[3]);
	SHA256(tstatebak,tmp);
	uint4_to_uintp(tmp,    X[4]);
	uint4_to_uintp(tmp+4,  X[5]);
	uint4_to_uintp(tmp+8,  X[6]);
	uint4_to_uintp(tmp+12, X[7]);
	SHA256(tstatebak,tmp);

	SHA256_fixed(tstatebak,&tstatebak[4]);

	for(uint i=0; i<8; i++)
		tmp[i] = tstatebak[i];
	tmp[8] = K[84];
	tmp[9] = 0x0U;
	tmp[10] = 0x0U;
	tmp[11] = 0x0U;
	tmp[12] = 0x0U;
	tmp[13] = 0x0U;
	tmp[14] = 0x0U;
	tmp[15] = K[88];

	SHA256(ostate,tmp);

    bool result = (EndianSwapa(ostate[7]) <= target);
    if (result)
        SETFOUND(gid);

}


