#ifndef SHA_256
#define SHA_256 1
#include "globals.h"

void SHA256(uint8 *digest, uint8 *block0, uint8 *block1){

	uint A = (*digest).s0;
	uint B = (*digest).s1;
	uint C = (*digest).s2;
	uint D = (*digest).s3;
	uint E = (*digest).s4;
	uint F = (*digest).s5;
	uint G = (*digest).s6;
	uint H = (*digest).s7;

	uint B00 = (*block0).s0;
	uint B01 = (*block0).s1;
	uint B02 = (*block0).s2;
	uint B03 = (*block0).s3;
	uint B04 = (*block0).s4;
	uint B05 = (*block0).s5;
	uint B06 = (*block0).s6;
	uint B07 = (*block0).s7;

	uint B08 = (*block1).s0;
	uint B09 = (*block1).s1;
	uint B10 = (*block1).s2;
	uint B11 = (*block1).s3;
	uint B12 = (*block1).s4;
	uint B13 = (*block1).s5;
	uint B14 = (*block1).s6;
	uint B15 = (*block1).s7;

	for(uint i=0; i<4; i++){

		RND(A,B,C,D,E,F,G,H, B00 + Kc[i].s0);
		RND(H,A,B,C,D,E,F,G, B01 + Kc[i].s1);
		RND(G,H,A,B,C,D,E,F, B02 + Kc[i].s2);
		RND(F,G,H,A,B,C,D,E, B03 + Kc[i].s3);
		RND(E,F,G,H,A,B,C,D, B04 + Kc[i].s4);
		RND(D,E,F,G,H,A,B,C, B05 + Kc[i].s5);
		RND(C,D,E,F,G,H,A,B, B06 + Kc[i].s6);
		RND(B,C,D,E,F,G,H,A, B07 + Kc[i].s7);

		RND(A,B,C,D,E,F,G,H, B08 + Kc[i].s8);
		RND(H,A,B,C,D,E,F,G, B09 + Kc[i].s9);
		RND(G,H,A,B,C,D,E,F, B10 + Kc[i].sa);
		RND(F,G,H,A,B,C,D,E, B11 + Kc[i].sb);
		RND(E,F,G,H,A,B,C,D, B12 + Kc[i].sc);
		RND(D,E,F,G,H,A,B,C, B13 + Kc[i].sd);
		RND(C,D,E,F,G,H,A,B, B14 + Kc[i].se);
		RND(B,C,D,E,F,G,H,A, B15 + Kc[i].sf);

		if(i==3)
    		break;

		B00 += Wr1(B14) + B09 + Wr2(B01);
		B01 += Wr1(B15) + B10 + Wr2(B02);
		B02 += Wr1(B00) + B11 + Wr2(B03);
		B03 += Wr1(B01) + B12 + Wr2(B04);
		B04 += Wr1(B02) + B13 + Wr2(B05);
		B05 += Wr1(B03) + B14 + Wr2(B06);
		B06 += Wr1(B04) + B15 + Wr2(B07);
		B07 += Wr1(B05) + B00 + Wr2(B08);

		B08 += Wr1(B06) + B01 + Wr2(B09);
		B09 += Wr1(B07) + B02 + Wr2(B10);
		B10 += Wr1(B08) + B03 + Wr2(B11);
		B11 += Wr1(B09) + B04 + Wr2(B12);
		B12 += Wr1(B10) + B05 + Wr2(B13);
		B13 += Wr1(B11) + B06 + Wr2(B14);
		B14 += Wr1(B12) + B07 + Wr2(B15);
		B15 += Wr1(B13) + B08 + Wr2(B00);
	}

	*digest += (uint8)(A, B, C, D, E, F, G, H);
}


void Scrypt_Key(__global uint16 *pad0, uint pady, const __global input_t *input, uint gid, uint8 tstate, uint8 ostate){

uint8 block0 = {input->c, input->d, input->e, gid, ZERO, SK00, ZERO, ZERO};
uint8 tmpa = input->a;
uint8 block1a = {0,0,0,0,0,0,0,SK04};
uint8 block1b = {SK00,0,0,0,0,0,0,SK05};
uint8 tmpb = input->b;
uint16 key[2];

SHA256(&tstate, &tmpa, &tmpb);

//uint8 tmpb = ZERO;
//uint8 tmpc;

for(uint i=0; i<4; i++){

block0.s4++;
//block2.s0 = ZERO;
//block2.s7 = SK04;

tmpb = tstate;
SHA256(&tmpb, &block0, &block1a);


tmpa = ostate;
//block2.s0 = SK00;
//block2.s7 = SK05;
SHA256(&tmpa, &tmpb, &block1b);

//key[i] = tmpa;
if(i&ONE)
	key[(i>>1)].hi = tmpa;

//pad0[(i>>1)+pady].hi = tmpa;
//(pad0+(i>>1)).hi = tmpa;
else
key[(i>>1)].lo = tmpa;

//pad0[(i>>1)+pady].lo = tmpa;
//(pad0+(i>>1)).lo = tmpa;

}

	pad0[pady] = key[0];
	pad0[pady+1] = key[1];
//pad0[pady].lo = key[0];
//pad0[pady].hi = key[1];
//pad0[pady+1].lo = key[2];
//pad0[pady+1].hi = key[3];


}

/*
void Tstate_SHAd(uint8 *tstate, __global uint16 *pad0, uint pady){

uint8 tmpa = pad0[pady].lo;
uint8 tmpb = pad0[pady].hi;
SHA256(tstate, &tmpa, &tmpb);
tmpa = pad0[pady+1].lo;
tmpb = pad0[pady+1].hi;
SHA256(tstate, &tmpa, &tmpb);

//uint16 tmp[2] = {pad0[pady], pad0[pady+1]};
//uint8 tmpa, tmpb;
//uint8 tmp[4] = {pad0[pady].lo, pad0[pady].hi, pad0[pady+1].lo, pad0[pady+1].hi};

uint8 tmpa = pad0[pady].lo;
uint8 tmpb = pad0[pady++].hi;

for(uint i=0; i<2; i++){
	//tmpa = pad0[pady+i].lo;
	//tmpb = pad0[pady+i].hi;
	SHA256(tstate, &tmpa, &tmpb);
	if(i==1)
		return;
	tmpa = pad0[pady].lo;
	tmpb = pad0[pady].hi;
}

}
*/


/*
void Int_SHA(uint4 midstate0, uint4 midstate16, const __global input_t *input, uint gid, uint8 *tstate, uint8 *ostate){
    uint8 tmpa = {input->c, input->d, input->e, gid, SK00, ZERO, ZERO, ZERO};
    uint8 tmpb = {ZERO, ZERO, ZERO, ZERO, ZERO, ZERO, ZERO, SK01};
    uint8 tmpc = {midstate0.s0, midstate0.s1, midstate0.s2, midstate0.s3, midstate16.s0, midstate16.s1, midstate16.s2, midstate16.s3};
*/

void Int_SHA(uint8 midsha, uint8 *tstate, uint8 *ostate){
//	uint8 initial = {0x6a09e667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, 0x510e527fU, 0x9b05688cU, 0x1F83D9ABU, 0x5BE0CD19U};
//	uint8 tmpa = {ZERO, ZERO, ZERO, ZERO, ZERO, ZERO, ZERO, SK01};
//	SHA256(midsha, input, &tmpa);
	//uint8 *states[2] = {ostate, tstate};
	//uint8 tmpb[2] = {(*midsha)^SK02, (*midsha)^SK03};
	//uint8 tmpc[2] = {SK02, SK03};

	//uint8 *state = ostate;

	uint8 tmpa = midsha^SK02;
	uint8 tmpb = SK02;

//	uint8 tmpb[2] = {SK02, SK03};
//	uint8 tmpc[2] = {(*midsha)^tmpb[0], (*midsha)^tmpb[1]};

	for(uint i=0; i<2; i++){
		//states[i] = initial;

		SHA256((i==0 ? ostate: tstate), &tmpa, &tmpb);

		if(i==1)
			return;
		//state = tstate;
		tmpa = midsha^SK03;
		tmpb = SK03;
	}
/*
    SHA256(&tmpc, &tmpa, &tmpb);

    tmpb = tmpc^SK02;
    tmpa = SK02;

    SHA256(ostate, &tmpb, &tmpa);

    tmpb = tmpc^SK03;
    tmpa = SK03;
    SHA256(tstate, &tmpb, &tmpa);
*/

}

void Final_SHAd(uint8 *tstate, uint8 *ostate){
	uint8 tmpa = {0x00000001U, 0x80000000U, 0, 0, 0, 0, 0, 0};
	uint8 tmpb = {0, 0, 0, 0, 0, 0, 0, 0x00000620U};
	uint8 tmpc = {SK00, 0, 0, 0, 0, 0, 0, SK05};

	SHA256(tstate, &tmpa, &tmpb);
	SHA256(ostate, tstate, &tmpc);

}
#endif