#include "globals.h"
#include "sha.h"
#include "salsa.h"
#include "shittify.h"
#include "scrypt.h"

__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
//__kernel void search(__global const uint4 *restrict input,
__kernel void search(const __global uint *restrict input,
volatile __global uint *restrict output, __global uint16 *restrict padcache, const uint4 midstate0, const uint4 midstate16, const uint target){
//volatile __global uint *restrict output, volatile __global uint16 *restrict padcache, const uint4 midstate0, const uint4 midstate16, const uint target){
	uint gid = get_global_id(0);

	uint16 X[2];
//	uint8 data[5];

//	volatile uint fixedWa[8] = {0x428a2f99,0xd807aa98,0xf59b89c2,0xb707775c,0xad87a3ea,0xc91b1417,0xe64fb6a2,0xe0a1adbe};
//	const uint fixedWb[8] = {0xf1374491,0x12835b01,0x73924787,0x0468c23f,0xbcb1d3a3,0xc359dce1,0xe84d923a,0x7c728e11};
//	volatile uint fixedWc[8] = {0xb5c0fbcf,0x243185be,0x23c6886e,0xe7e72b4c,0x7b993186,0xa83253a7,0xe93a5730,0x511c78e4};
//	const uint fixedWd[8] = {0xe9b5dba5,0x550c7dc3,0xa42ca65c,0x49e1f1a2,0x562b9420,0x3b13c12d,0x09837686,0x315b45bd};
//	volatile uint fixedWe[8] = {0x3956c25b,0x72be5d74,0x15ed3627,0x4b99c816,0xbff3ca0c,0x9d3d725d,0x078ff753,0xfca71413};
//	const uint fixedWf[8] = {0x59f111f1,0x80deb1fe,0x4d6edcbf,0x926d1570,0xda4b0c23,0xd9031a84,0x29833341,0xea28f96a};
//	volatile uint fixedWg[8] = {0x923f82a4,0x9bdc06a7,0xe28217fc,0xaa0fc072,0x6cd8711a,0xb1a03340,0xd5de0b7e,0x79703128};
//	const uint fixedWh[8] = {0xab1c5ed5,0xc19bf794,0xef02488f,0xadb36e2c,0x8f337caa,0x16f58012,0x6948ccf4,0x4e1ef848};

	DecAllSK
	bool zero = 0;
	bool one = 1;
	uint lnum0 = 0; //0
	uint tstate00; //0
	uint tstate01; //0
	uint tstate02; //0
	uint tstate03; //0
	uint tstate04; //0
	uint tstate05; //0
	uint tstate06; //0
	uint tstate07; //0
	uint ostate00; //0
	uint ostate01; //0
	uint ostate02; //0
	uint ostate03; //0
	uint ostate04; //0
	uint ostate05; //0
	uint ostate06; //0
	uint ostate07; //0
	uint tstatebak00; //0
	uint tstatebak01; //0
	uint tstatebak02; //0
	uint tstatebak03; //0
	uint tstatebak04; //0
	uint tstatebak05; //0
	uint tstatebak06; //0
	uint tstatebak07; //0
	uint tmp00; //0
	uint tmp01; //0
	uint tmp02; //0
	uint tmp03; //0
	uint tmp04; //0
	uint tmp05; //0
	uint tmp06; //0
	uint tmp07; //0

//	uint tmp08; //0
//	uint tmp09; //0
//	uint tmp10; //0
//	uint tmp11; //0
//	uint tmp12; //0
//	uint tmp13; //0
//	uint tmp14; //0
//	uint tmp15; //0

	uint pad00 = midstate0.x;
	uint pad01 = midstate0.y;
	uint pad02 = midstate0.z;
	uint pad03 = midstate0.w;
	uint pad04 = midstate16.x;
	uint pad05 = midstate16.y;
	uint pad06 = midstate16.z;
	uint pad07 = midstate16.w;
	//uint pad08 = 0;
	//uint pad09 = 0;
	//uint pad10 = 0;
	//uint pad11 = 0;
	//uint pad12 = 0;
	//uint pad13 = 0;
	//uint pad14 = 0;
	//uint pad15 = 0;
	//uint data00 = input[4].x;
	//uint data01 = input[4].y;
	//uint data02 = input[4].z;
	uint data00 = input[16];
	uint data01 = input[17];
	uint data02 = input[18];
//	uint data03 = gid;

/*
	uint data04 = SK02;
	uint data05 = 0; //0
	uint data06 = 0; //0
	uint data07 = 0; //0
	uint data08 = 0; //0
	uint data09 = 0; //0
	uint data10 = 0; //0
	uint data11 = 0; //0
	uint data12 = 0; //0
	uint data13 = 0; //0
	uint data14 = 0; //0
	uint data15 = SK03;
*/

	SHA256(&pad00, &pad01, &pad02, &pad03, &pad04, &pad05, &pad06, &pad07, data00, data01, data02, gid,
//		data04, data05, data06, data07, data08, data09, data10, data11, data12, data13, data14, data15, one);
		SK02, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, SK03, one);


	tmp00 = pad00^SK00;
	tmp01 = pad01^SK00;
	tmp02 = pad02^SK00;
	tmp03 = pad03^SK00;
	tmp04 = pad04^SK00;
	tmp05 = pad05^SK00;
	tmp06 = pad06^SK00;
	tmp07 = pad07^SK00;
/*
	tmp08 = SK00;
	tmp09 = SK00;
	tmp10 = SK00;
	tmp11 = SK00;
	tmp12 = SK00;
	tmp13 = SK00;
	tmp14 = SK00;
	tmp15 = SK00;
*/
	SHA256(&ostate00, &ostate01, &ostate02, &ostate03, &ostate04, &ostate05, &ostate06, &ostate07,
		tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, SK00, SK00, SK00, SK00, SK00, SK00, SK00, SK00, zero);

	tmp00 = pad00^SK01;
	tmp01 = pad01^SK01;
	tmp02 = pad02^SK01;
	tmp03 = pad03^SK01;
	tmp04 = pad04^SK01;
	tmp05 = pad05^SK01;
	tmp06 = pad06^SK01;
	tmp07 = pad07^SK01;
/*
	tmp08 = SK01;
	tmp09 = SK01;
	tmp10 = SK01;
	tmp11 = SK01;
	tmp12 = SK01;
	tmp13 = SK01;
	tmp14 = SK01;
	tmp15 = SK01;
*/
	SHA256(&tstate00, &tstate01, &tstate02, &tstate03, &tstate04, &tstate05, &tstate06, &tstate07,
		tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, SK01, SK01, SK01, SK01, SK01, SK01, SK01, SK01, zero);

	//backup tstate
	tstatebak00 = tstate00;
	tstatebak01 = tstate01;
	tstatebak02 = tstate02;
	tstatebak03 = tstate03;
	tstatebak04 = tstate04;
	tstatebak05 = tstate05;
	tstatebak06 = tstate06;
	tstatebak07 = tstate07;
/*
	tmp00 = input[0].x;
	tmp01 = input[0].y;
	tmp02 = input[0].z;
	tmp03 = input[0].w;
	tmp04 = input[1].x;
	tmp05 = input[1].y;
	tmp06 = input[1].z;
	tmp07 = input[1].w;
	tmp08 = input[2].x;
	tmp09 = input[2].y;
	tmp10 = input[2].z;
	tmp11 = input[2].w;
	tmp12 = input[3].x;
	tmp13 = input[3].y;
	tmp14 = input[3].z;
	tmp15 = input[3].w;
*/

/*
    tmp00 = input[0];
    tmp01 = input[1];
    tmp02 = input[2];
    tmp03 = input[3];
    tmp04 = input[4];
    tmp05 = input[5];
    tmp06 = input[6];
    tmp07 = input[7];
    tmp08 = input[8];
    tmp09 = input[9];
    tmp10 = input[10];
    tmp11 = input[11];
    tmp12 = input[12];
    tmp13 = input[13];
    tmp14 = input[14];
    tmp15 = input[15];
*/

	SHA256(&tstate00, &tstate01, &tstate02, &tstate03, &tstate04, &tstate05, &tstate06, &tstate07,
		input[0], input[1], input[2], input[3], input[4], input[5], input[6], input[7],
		input[8], input[9], input[10], input[11], input[12], input[13], input[14], input[15], one);

//#pragma unroll
	for(uint i=0; i<2; i++){
		pad00 = tstate00;
		pad01 = tstate01;
		pad02 = tstate02;
		pad03 = tstate03;
		pad04 = tstate04;
		pad05 = tstate05;
		pad06 = tstate06;
		pad07 = tstate07;

//		XA[lnum0] = ostate00;
//		XB[lnum0] = ostate01;
//		XC[lnum0] = ostate02;
//		XD[lnum0] = ostate03;
//		XE[lnum0] = ostate04;
//		XF[lnum0] = ostate05;
//		XG[lnum0] = ostate06;
//		XH[lnum0] = ostate07;

		tmp00 = ostate00;
		tmp01 = ostate01;
		tmp02 = ostate02;
		tmp03 = ostate03;
		tmp04 = ostate04;
		tmp05 = ostate05;
		tmp06 = ostate06;
		tmp07 = ostate07;

		lnum0++;
		SHA256(&pad00, &pad01, &pad02, &pad03, &pad04, &pad05, &pad06, &pad07, data00, data01, data02, gid,
			lnum0, SK02, zero, zero, zero, zero, zero, zero, zero, zero, zero, SK04, one);

//use tmp
		SHA256(&tmp00, &tmp01, &tmp02, &tmp03, &tmp04, &tmp05, &tmp06, &tmp07,
			pad00, pad01, pad02, pad03, pad04, pad05, pad06, pad07, SK02, zero, zero, zero, zero, zero, zero, SK05, one);


		//SHA256(&XA[i], &XB[i], &XC[i], &XD[i], &XE[i], &XF[i], &XG[i], &XH[i],
		//	pad00, pad01, pad02, pad03, pad04, pad05, pad06, pad07, SK02, zero, zero, zero, zero, zero, zero, SK05, one);

		X[i].s01234567 = (uint8)(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07);

		pad00 = tstate00;
		pad01 = tstate01;
		pad02 = tstate02;
		pad03 = tstate03;
		pad04 = tstate04;
		pad05 = tstate05;
		pad06 = tstate06;
		pad07 = tstate07;

		tmp00 = ostate00;
		tmp01 = ostate01;
		tmp02 = ostate02;
		tmp03 = ostate03;
		tmp04 = ostate04;
		tmp05 = ostate05;
		tmp06 = ostate06;
		tmp07 = ostate07;

		lnum0++;
		SHA256(&pad00, &pad01, &pad02, &pad03, &pad04, &pad05, &pad06, &pad07, data00, data01, data02, gid,
			lnum0, SK02, zero, zero, zero, zero, zero, zero, zero, zero, zero, SK04, one);

		SHA256(&tmp00, &tmp01, &tmp02, &tmp03, &tmp04, &tmp05, &tmp06, &tmp07,
			pad00, pad01, pad02, pad03, pad04, pad05, pad06, pad07, SK02, zero, zero, zero, zero, zero, zero, SK05, one);

		X[i].s89abcdef = (uint8)(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07);

	}


	shittify(X);
	scrypt_core(X, padcache);
	unshittify(X);


	//scrypt_core(XA, XB, XC, XD, XE, XF, XG, XH, padcache);

//	SHA256(&tstatebak00, &tstatebak01, &tstatebak02, &tstatebak03, &tstatebak04, &tstatebak05, &tstatebak06, &tstatebak07,
//		XA[0], XB[0], XC[0], XD[0], XE[0], XF[0], XG[0], XH[0], XA[1], XB[1], XC[1], XD[1], XE[1], XF[1], XG[1], XH[1], one);

//	SHA256(&tstatebak00, &tstatebak01, &tstatebak02, &tstatebak03, &tstatebak04, &tstatebak05, &tstatebak06, &tstatebak07,
//		XA[2], XB[2], XC[2], XD[2], XE[2], XF[2], XG[2], XH[2], XA[3], XB[3], XC[3], XD[3], XE[3], XF[3], XG[3], XH[3], one);


	for(uint i=0; i<2; i++)
		SHA256(&tstatebak00, &tstatebak01, &tstatebak02, &tstatebak03, &tstatebak04, &tstatebak05, &tstatebak06, &tstatebak07,
			X[i].s0, X[i].s1, X[i].s2, X[i].s3, X[i].s4, X[i].s5, X[i].s6, X[i].s7, X[i].s8, X[i].s9, X[i].sa, X[i].sb, X[i].sc, X[i].sd, X[i].se, X[i].sf, one);

//	SHA256(&tstatebak00, &tstatebak01, &tstatebak02, &tstatebak03, &tstatebak04, &tstatebak05, &tstatebak06, &tstatebak07,
//		X[1].s0, X[1].s1, X[1].s2, X[1].s3, X[1].s4, X[1].s5, X[1].s6, X[1].s7, X[1].s8, X[1].s9, X[1].sa, X[1].sb, X[1].sc, X[1].sd, X[1].se, X[1].sf, one);



	tstate00 = tstatebak00;
	tstate01 = tstatebak01;
	tstate02 = tstatebak02;
	tstate03 = tstatebak03;
	tstate04 = tstatebak04;
	tstate05 = tstatebak05;
	tstate06 = tstatebak06;
	tstate07 = tstatebak07;

#define A tstate00
#define B tstate01
#define C tstate02
#define D tstate03
#define E tstate04
#define F tstate05
#define G tstate06
#define H tstate07
	for(uint i=0; i<8; i++){
		RND(A,B,C,D,E,F,G,H, fixedWa[i]);
		RND(H,A,B,C,D,E,F,G, fixedWb[i]);
		RND(G,H,A,B,C,D,E,F, fixedWc[i]);
		RND(F,G,H,A,B,C,D,E, fixedWd[i]);
		RND(E,F,G,H,A,B,C,D, fixedWe[i]);
		RND(D,E,F,G,H,A,B,C, fixedWf[i]);
		RND(C,D,E,F,G,H,A,B, fixedWg[i]);
		RND(B,C,D,E,F,G,H,A, fixedWh[i]);
	}
#undef A
#undef B
#undef C
#undef D
#undef E
#undef F
#undef G
#undef H

	tstatebak00 += tstate00;
	tstatebak01 += tstate01;
	tstatebak02 += tstate02;
	tstatebak03 += tstate03;
	tstatebak04 += tstate04;
	tstatebak05 += tstate05;
	tstatebak06 += tstate06;
	tstatebak07 += tstate07;

	SHA256(&ostate00, &ostate01, &ostate02, &ostate03, &ostate04, &ostate05, &ostate06, &ostate07,
		tstatebak00, tstatebak01, tstatebak02, tstatebak03, tstatebak04, tstatebak05, tstatebak06, tstatebak07,
		SK02, zero, zero, zero, zero, zero, zero, SK05, one);

	one = (EndianSwapa(ostate07) <= target);
	if(one)
		SETFOUND(gid);
}
