#include "globals.h"
#include "sha.h"
#include "scrypt.h"

__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uint4 *restrict input,
volatile __global uint *restrict output, __global uint4 *restrict padcache, const uint4 midstate0, const uint4 midstate16, const uint target){

	uint gid = get_global_id(0);
	uint4 X[8];
	//uint4 tmp0, tmp1;
	//uint4 pass0, pass1, pass2;
	//uint8 SHA256(uint8 restrict digest, uint16 restrict block){
	//uint8 SHA256(uint *restrict digest, uint *restrict block){
	uint tstate[8] = {0, 0, 0, 0, 0, 0, 0, 0};
	uint ostate[8] = {0, 0, 0, 0, 0, 0, 0, 0};
	uint tstatebak[8];
	uint tmp[16];
	uint pad[16];
	uint data[16];





	FOURdeclare(tstate0, 0, 0, 0, 0);
	FOURdeclare(tstate1, 0, 0, 0, 0);
	FOURdeclare(ostate0, 0, 0, 0, 0);
	FOURdeclare(ostate1, 0, 0, 0, 0);

	FOURdeclare(data, input[4].x,input[4].y,input[4].z,gid);
	FOURdeclare(pad0, midstate0.x, midstate0.y, midstate0.z, midstate0.w);
	FOURdeclare(pad1, midstate16.x, midstate16.y, midstate16.z, midstate16.w);

	FOURtovec(pass0, pad0);
	FOURtovec(pass1, pad1);
	FOURtovec(pass2, data);

	SHA256(&pass0,&pass1, pass2, (uint4)(sK[2],0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, sK[3]), 1);

	FOURfromvec(pad0, pass0);
	FOURfromvec(pad1, pass1);

	tmp0 = pass0^sK[0];
	tmp1 = pass1^sK[0];


	FOURtovec(pass0, ostate0);
	FOURtovec(pass1, ostate1);
	SHA256(&pass0, &pass1, tmp0, tmp1, sK[0], sK[0], 0);
    FOURfromvec(ostate0, pass0);
    FOURfromvec(ostate1, pass1);

	FOURtovec(pass0, pad0);
	FOURtovec(pass1, pad1);
	tmp0 = pass0^sK[1];
	tmp1 = pass1^sK[1];
	FOURtovec(pass0, tstate0);
	FOURtovec(pass1, tstate1);
	SHA256(&pass0, &pass1, tmp0, tmp1, sK[1], sK[1], 0);
	FOURfromvec(tstate0, pass0);
	FOURfromvec(tstate1, pass1);

	tmp0 = pass0;
	tmp1 = pass1;

	SHA256(&pass0, &pass1, input[0],input[1],input[2],input[3], 1);
	FOURfromvec(tstate0, pass0);
	FOURfromvec(tstate1, pass1);


	for (uint i=0; i<4; i++){
		FOURcopy(pad0, tstate0);
		FOURcopy(pad1, tstate1);

		//FOURtovec( X[(i<<1) ], ostate0);
		//FOURtovec( X[(i<<1)+1], ostate1);
		FOURtovec(pass0, ostate0);
		FOURtovec(pass1, ostate1);
		X[(i<<1) ] = pass0;
		X[(i<<1)+1] = pass1;

		FOURtovec(pass0, pad0);
		FOURtovec(pass1, pad1);
		FOURtovec(pass2, data);
		SHA256(&pass0, &pass1, pass2, (uint4)(i+1,sK[2],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, sK[4]), 1);
		FOURfromvec(pad0, pass0);
		FOURfromvec(pad1, pass1);
		SHA256(X+(i<<1),X+(i<<1)+1, pass0, pass1, (uint4)(sK[2], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, sK[5]), 1);
	}


	scrypt_core(X,padcache);

	SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3], 1);
	SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7], 1);

	FOURfromvec(tstate0, tmp0);
	FOURfromvec(tstate1, tmp1);

#define A tstate01
#define B tstate02
#define C tstate03
#define D tstate04
#define E tstate11
#define F tstate12
#define G tstate13
#define H tstate14

#pragma unroll UNROLL_FACTOR
	for(uint i=0; i<8; i++){
		RND(A,B,C,D,E,F,G,H, fixedWa[i]);
		RND(H,A,B,C,D,E,F,G, fixedWb[i]);
		RND(G,H,A,B,C,D,E,F, fixedWc[i]);
		RND(F,G,H,A,B,C,D,E, fixedWd[i]);
		RND(E,F,G,H,A,B,C,D, fixedWe[i]);
		RND(D,E,F,G,H,A,B,C, fixedWf[i]);
		RND(C,D,E,F,G,H,A,B, fixedWg[i]);
		RND(B,C,D,E,F,G,H,A, fixedWh[i]);
	}

#undef A
#undef B
#undef C
#undef D
#undef E
#undef F
#undef G
#undef H

	FOURtovec(pass0, tstate0);
	FOURtovec(pass1, tstate1);

	tmp0 += pass0;
	tmp1 += pass1;

	FOURtovec(pass2, ostate0);
	FOURtovec(pass0, ostate1);
	SHA256(&pass2,&pass0, tmp0, tmp1, (uint4)(sK[2], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, sK[5]), 1);

	FOURfromvec(ostate1, pass0);

	bool result = (EndianSwapa(ostate14) <= target);
	if (result)
		SETFOUND(gid);
}
