#include "globals.h"
#include "sha.h"
#include "scrypt.h"

__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(const __global input_t *input,
volatile __global uint *output,
__global uint16 *padcache,
volatile uint4 midstate0, volatile uint4 midstate16, const uint target){

#ifdef GOFFSET
	uint gid = get_global_id(0);
#else
	uint gid = input->f + get_global_id(0);
#endif

	uint8 ostate = {0x6a09e667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, 0x510e527fU, 0x9b05688cU, 0x1F83D9ABU, 0x5BE0CD19U};
	uint8 tstate = {0x6a09e667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, 0x510e527fU, 0x9b05688cU, 0x1F83D9ABU, 0x5BE0CD19U};

//	uint8 states[2] = { (uint8)(0x6a09e667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, 0x510e527fU, 0x9b05688cU, 0x1F83D9ABU, 0x5BE0CD19U),
//						(uint8)(0x6a09e667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, 0x510e527fU, 0x9b05688cU, 0x1F83D9ABU, 0x5BE0CD19U)};

	//uint8 tstate = {0x6a09e667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, 0x510e527fU, 0x9b05688cU, 0x1F83D9ABU, 0x5BE0CD19U};


//	uint8 tmpa = {input->c, input->d, input->e, gid, SK00, ZERO, ZERO, ZERO};
//	uint8 tmpb = {ZERO, ZERO, ZERO, ZERO, ZERO, ZERO, ZERO, SK01};
//	uint8 tmpc = {midstate0.s0, midstate0.s1, midstate0.s2, midstate0.s3, midstate16.s0, midstate16.s1, midstate16.s2, midstate16.s3};

uint8 tmpa = {input->c, input->d, input->e, gid, SK00, ZERO, ZERO, ZERO};
uint8 tmpb = {midstate0.s0, midstate0.s1, midstate0.s2, midstate0.s3, midstate16.s0, midstate16.s1, midstate16.s2, midstate16.s3};
uint8 tmpc = {ZERO, ZERO, ZERO, ZERO, ZERO, ZERO, ZERO, SK01};

	//uint8 tmpc = {midstate0, midstate16};

	uint padx = gid%CONCURRENT_THREADS;
	uint pady = padx<<1;
	uint padz = pady+1;
	//uint tidx = ((CONCURRENT_THREADS+padx)<<1);

    //uint8 initial = {0x6a09e667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, 0x510e527fU, 0x9b05688cU, 0x1F83D9ABU, 0x5BE0CD19U};
    //uint8 tmpc = {ZERO, ZERO, ZERO, ZERO, ZERO, ZERO, ZERO, SK01};
    SHA256(&tmpb, &tmpa, &tmpc);


Int_SHA(tmpb, &tstate, &ostate);
//Int_SHA(midstate0, midstate16, input, gid, &tstate, &ostate);
//SHA256(&tmpc, &tmpa, &tmpb);

//Int_SHA(&tmpc, &ostate, &tstate);

/*
	SHA256(&tmpc, &tmpa, &tmpb);
	tmpb = tmpc^SK02;
	tmpa = SK02;
	SHA256(&ostate, &tmpb, &tmpa);
	tmpb = tmpc^SK03;
	tmpa = SK03;
	SHA256(&tstate, &tmpb, &tmpa);
*/
	//backup tstate

	Scrypt_Key(padcache, pady, input, gid, tstate, ostate);
//Scrypt_Key(padcache, pady, input, gid, states[1], states[0]);
/*
{
	const uint8 tstatebak = tstate;
	//padcache[((CONCURRENT_THREADS+padx)<<1)].lo = tstate;
	uint8 block1 = {input->c, input->d, input->e, gid, ZERO, SK00, ZERO, ZERO};
	tmpb = input->a;
	tmpc = input->b;
	
	SHA256(&tstate, &tmpb, &tmpc);

	tmpb = ZERO;

	for(uint i=0; i<4; i++){
		//reset a

		tmpa.s0 = input->c;
		tmpa.s1 = input->d;
		tmpa.s2 = input->e;
		tmpa.s3 = gid;
		tmpa.s4 = i+1;
		tmpa.s5 = SK00;
		tmpa.s6 = ZERO;
		tmpa.s7 = ZERO;

		block1.s4++; //= i+1;
//		tmpa = block1;

		//set b
		tmpb.s0 = ZERO;
		tmpb.s7 = SK04;
		//set c
		tmpc = tstate;
		SHA256(&tmpc, &block1, &tmpb);

		//set a
		tmpa = ostate;
		//set b
		tmpb.s0 = SK00;
		tmpb.s7 = SK05;
		SHA256(&tmpa, &tmpc, &tmpb);

		if(i&ONE)
			padcache[(i>>1)+pady].hi = tmpa;
		else
			padcache[(i>>1)+pady].lo = tmpa;
	}
	//tstate = padcache[((CONCURRENT_THREADS+padx)<<1)].lo;
	tstate = tstatebak;
}

*/
//(Ch(E0, rotl(n, 8U), rotl(n, 24U)))

//bitselect(rotl(n, 24U),rotl(n, 8U),E0)

Shittify(&padcache[pady], &padcache[padz], 1);
//Shittify(&padcache[pady], &padcache[padz]);
/*
tmpa = padcache[pady];
tmpb = padcache[padz];
tmpa = tmpa.s49e38d27c16b05af;
tmpb = tmpb.s49e38d27c16b05af;
padcache[pady] = EndianSwapa(tmpa);
padcache[padz] = EndianSwapa(tmpb);
*/
//	padcache[pady] = EndianSwapa(padcache[pady].s49e38d27c16b05af);
//	padcache[padz] = EndianSwapa(padcache[padz].s49e38d27c16b05af);
	scrypt_core(padcache, padx);
//	padcache[pady] = EndianSwapa(padcache[pady].sc9630da741eb852f);
//	padcache[padz] = EndianSwapa(padcache[padz].sc9630da741eb852f);
Shittify(&padcache[pady], &padcache[padz], 0);
//unShittify(&padcache[pady], &padcache[padz]);
/*
tmpa = padcache[pady];
tmpb = padcache[padz];
tmpa = tmpa.sc9630da741eb852f;
tmpb = tmpb.sc9630da741eb852f;
padcache[pady] = EndianSwapa(tmpa);
padcache[padz] = EndianSwapa(tmpb);
*/

	//tstate = tstatebak;
/*
	for(uint i=0; i<2; i++){
		tmpa = padcache[pady+i].lo;
		tmpc = padcache[pady+i].hi;
		SHA256(&tstate, &tmpa, &tmpc);
	}
*/

//Tstate_SHAd(&tstate, padcache, pady);


tmpa = padcache[pady].lo;
tmpb = padcache[pady++].hi;

for(uint i=0; i<2; i++){
//    tmpa = padcache[pady].lo;
//    tmpb = padcache[pady++].hi;
    SHA256(&tstate, &tmpa, &tmpb);
    if(i==1)
        break;
    tmpa = padcache[pady].lo;
    tmpb = padcache[pady].hi;
}





Final_SHAd(&tstate, &ostate);
/*
	tmpa = ZERO;
	tmpa.s0 = 0x00000001U;
	tmpa.s1 = 0x80000000U;
	tmpc = ZERO;
	tmpc.s7 = 0x00000620U;
	tmpb = ZERO;
	tmpb.s0 = SK00;
	tmpb.s7 = SK05;

	SHA256(&tstate, &tmpa, &tmpc);
	SHA256(&ostate, &tstate, &tmpb);
*/
	//bool found = (EndianSwapa((ostate.s7)) <= target);
	if( (EndianSwapa((ostate.s7)) <= target) )
		SETFOUND(gid);
}