#ifndef SHA
#define SHA 1

void SHA256(uint4 *restrict state0,uint4 *restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3, bool notfresh){

	
#define A S0a
#define B S0b
#define C S0c
#define D S0d
#define E S1a
#define F S1b
#define G S1c
#define H S1d

#define Wx a1
#define Wy a2
#define Wz a3
#define Ww a4

#define Xx b1
#define Xy b2
#define Xz b3 
#define Xw b4

#define Yx c1
#define Yy c2
#define Yz c3 
#define Yw c4

#define Zx d1
#define Zy d2
#define Zz d3 
#define Zw d4

	uint4 tmp0 = *state0;
	uint4 tmp1 = *state1;

	uint a1 = block0.x;
	uint b1 = block1.x;
	uint c1 = block2.x;
	uint d1 = block3.x;
	uint S0a = tmp0.x;
	uint S1a = tmp1.x;

	uint a2 = block0.y;
	uint b2 = block1.y;
	uint c2 = block2.y;
	uint d2 = block3.y;
	uint S0b = tmp0.y;
	uint S1b = tmp1.y;

	uint a3 = block0.z;
	uint b3 = block1.z;
	uint c3 = block2.z;
	uint d3 = block3.z;
	uint S0c = tmp0.z;
	uint S1c = tmp1.z;

	uint a4 = block0.w;
	uint b4 = block1.w;
	uint c4 = block2.w;
	uint d4 = block3.w;
	uint S0d = tmp0.w;
	uint S1d = tmp1.w;

	uint K0, K1, K2, K3, K4, K5, K6, K7;

	if(notfresh){
		K0 = K[0];
		K1 = K[1];
		K2 = K[2];
		K3 = K[3];
		RND(A,B,C,D,E,F,G,H, K0+Wx);
		RND(H,A,B,C,D,E,F,G, K1+Wy);
		RND(G,H,A,B,C,D,E,F, K2+Wz);
		RND(F,G,H,A,B,C,D,E, K3+Ww);
	}else{
		K0 = K[63];
		K1 = K[64];
		K2 = K[65];
		K3 = K[66];
		K4 = K[67];
		K5 = K[68];
		K6 = K[69];
		K7 = K[70];

		D= K0 +Wx;
		H= K1 +Wx;
		C= K2 +Tr1(D)+Ch(D, K3, K4)+Wy;
		G= K5 +C+Tr2(H)+Ch(H, K6 ,K7);

		tmp1.x = K3;
		tmp1.y = K4;

		K4 = K[71];
		K5 = K[72];
		K6 = K[73];
		K7 = K[74];
		K0 = K[75];

		B= K4 +Tr1(C)+Ch(C,D,K3)+Wz;
		F= K5 +B+Tr2(G)+Maj(G,H, K6);
		A= K7 +Tr1(B)+Ch(B,C,D)+Ww;
		E= K0 +A+Tr2(F)+Maj(F,G,H);

		tmp0.x = K6;
		tmp0.y = K[77];
		tmp0.z = K[78];
		tmp0.w = K[79];
		tmp1.z = K[80];
		tmp1.w = K[81];
	}

	K0 = K[4];
	K1 = K[5];
	K2 = K[6];
	K3 = K[7];
	RND(E,F,G,H,A,B,C,D,K0+Xx);
	RND(D,E,F,G,H,A,B,C,K1+Xy);
	RND(C,D,E,F,G,H,A,B,K2+Xz);
	RND(B,C,D,E,F,G,H,A,K3+Xw);
	K4 = K[8];
	K5 = K[9];
	K6 = K[10];
	K7 = K[11];
	RND(A,B,C,D,E,F,G,H,K4+Yx);
	RND(H,A,B,C,D,E,F,G,K5+Yy);
	RND(G,H,A,B,C,D,E,F,K6+Yz);
	RND(F,G,H,A,B,C,D,E,K7+Yw);
	K7 = K[76];
	K0 = K[12];
	K1 = K[13];
	K2 = K[14];
	RND(E,F,G,H,A,B,C,D,K0+Zx);
	RND(D,E,F,G,H,A,B,C,K1+Zy);
	RND(C,D,E,F,G,H,A,B,K2+Zz);
	RND(B,C,D,E,F,G,H,A,K7+Zw);
	K3 = K[15];
	K4 = K[16];
	K5 = K[17];
	K6 = K[18];
	Wx += Wr1(Zz) + Yy + Wr2(Wy);
	RND(A,B,C,D,E,F,G,H, Wx+ K3);
	Wy += Wr1(Zw) + Yz + Wr2(Wz);
	RND(H,A,B,C,D,E,F,G, Wy+ K4);
	Wz += Wr1(Wx) + Yw + Wr2(Ww);
	RND(G,H,A,B,C,D,E,F, Wz+ K5);
	Ww += Wr1(Wy) + Zx + Wr2(Xx);
	RND(F,G,H,A,B,C,D,E, Ww+ K6);
	K0 = K[19];
	K1 = K[20];
	K2 = K[21];
	K3 = K[22];
	Xx += Wr1(Wz) + Zy + Wr2(Xy);
	RND(E,F,G,H,A,B,C,D, Xx+ K0);
	Xy += Wr1(Ww) + Zz + Wr2(Xz);
	RND(D,E,F,G,H,A,B,C, Xy+ K1);
	Xz += Wr1(Xx) + Zw + Wr2(Xw);
	RND(C,D,E,F,G,H,A,B, Xz+ K2);
	Xw += Wr1(Xy) + Wx + Wr2(Yx);
	RND(B,C,D,E,F,G,H,A, Xw+ K3);
	K4 = K[23];
	K5 = K[24];
	K6 = K[25];
	K7 = K[26];
	Yx += Wr1(Xz) + Wy + Wr2(Yy);
	RND(A,B,C,D,E,F,G,H, Yx+ K4);
	Yy += Wr1(Xw) + Wz + Wr2(Yz);
	RND(H,A,B,C,D,E,F,G, Yy+ K5);
	Yz += Wr1(Yx) + Ww + Wr2(Yw);
	RND(G,H,A,B,C,D,E,F, Yz+ K6);
	Yw += Wr1(Yy) + Xx + Wr2(Zx);
	RND(F,G,H,A,B,C,D,E, Yw+ K7);
	K0 = K[27];
	K1 = K[28];
	K2 = K[29];
	K3 = K[30];
	Zx += Wr1(Yz) + Xy + Wr2(Zy);
	RND(E,F,G,H,A,B,C,D, Zx+ K0);
	Zy += Wr1(Yw) + Xz + Wr2(Zz);
	RND(D,E,F,G,H,A,B,C, Zy+ K1);
	Zz += Wr1(Zx) + Xw + Wr2(Zw);
	RND(C,D,E,F,G,H,A,B, Zz+ K2);
	Zw += Wr1(Zy) + Yx + Wr2(Wx);
	RND(B,C,D,E,F,G,H,A, Zw+ K3);
	K4 = K[31];
	K5 = K[32];
	K6 = K[33];
	K7 = K[34];
	Wx += Wr1(Zz) + Yy + Wr2(Wy);
	RND(A,B,C,D,E,F,G,H, Wx+ K4);
	Wy += Wr1(Zw) + Yz + Wr2(Wz);
	RND(H,A,B,C,D,E,F,G, Wy+ K5);
	Wz += Wr1(Wx) + Yw + Wr2(Ww);
	RND(G,H,A,B,C,D,E,F, Wz+ K6);
	Ww += Wr1(Wy) + Zx + Wr2(Xx);
	RND(F,G,H,A,B,C,D,E, Ww+ K7);
	K0 = K[35];
	K1 = K[36];
	K2 = K[37];
	K3 = K[38];
	Xx += Wr1(Wz) + Zy + Wr2(Xy);
	RND(E,F,G,H,A,B,C,D, Xx+ K0);
	Xy += Wr1(Ww) + Zz + Wr2(Xz);
	RND(D,E,F,G,H,A,B,C, Xy+ K1);
	Xz += Wr1(Xx) + Zw + Wr2(Xw);
	RND(C,D,E,F,G,H,A,B, Xz+ K2);
	Xw += Wr1(Xy) + Wx + Wr2(Yx);
	RND(B,C,D,E,F,G,H,A, Xw+ K3);
	K4 = K[39];
	K5 = K[40];
	K6 = K[41];
	K7 = K[42];
	Yx += Wr1(Xz) + Wy + Wr2(Yy);
	RND(A,B,C,D,E,F,G,H, Yx+ K4);
	Yy += Wr1(Xw) + Wz + Wr2(Yz);
	RND(H,A,B,C,D,E,F,G, Yy+ K5);
	Yz += Wr1(Yx) + Ww + Wr2(Yw);
	RND(G,H,A,B,C,D,E,F, Yz+ K6);
	Yw += Wr1(Yy) + Xx + Wr2(Zx);
	RND(F,G,H,A,B,C,D,E, Yw+ K7);
	K0 = K[43];
	K1 = K[44];
	K2 = K[45];
	K3 = K[46];
	Zx += Wr1(Yz) + Xy + Wr2(Zy);
	RND(E,F,G,H,A,B,C,D, Zx+ K0);
	Zy += Wr1(Yw) + Xz + Wr2(Zz);
	RND(D,E,F,G,H,A,B,C, Zy+ K1);
	Zz += Wr1(Zx) + Xw + Wr2(Zw);
	RND(C,D,E,F,G,H,A,B, Zz+ K2);
	Zw += Wr1(Zy) + Yx + Wr2(Wx);
	RND(B,C,D,E,F,G,H,A, Zw+ K3);
	K4 = K[47];
	K5 = K[48];
	K6 = K[49];
	K7 = K[50];
	Wx += Wr1(Zz) + Yy + Wr2(Wy);
	RND(A,B,C,D,E,F,G,H, Wx+ K4);
	Wy += Wr1(Zw) + Yz + Wr2(Wz);
	RND(H,A,B,C,D,E,F,G, Wy+ K5);
	Wz += Wr1(Wx) + Yw + Wr2(Ww);
	RND(G,H,A,B,C,D,E,F, Wz+ K6);
	Ww += Wr1(Wy) + Zx + Wr2(Xx);
	RND(F,G,H,A,B,C,D,E, Ww+ K7);
	K0 = K[51];
	K1 = K[52];
	K2 = K[53];
	K3 = K[54];
	Xx += Wr1(Wz) + Zy + Wr2(Xy);
	RND(E,F,G,H,A,B,C,D, Xx+ K0);
	Xy += Wr1(Ww) + Zz + Wr2(Xz);
	RND(D,E,F,G,H,A,B,C, Xy+ K1);
	Xz += Wr1(Xx) + Zw + Wr2(Xw);
	RND(C,D,E,F,G,H,A,B, Xz+ K2);
	Xw += Wr1(Xy) + Wx + Wr2(Yx);
	RND(B,C,D,E,F,G,H,A, Xw+ K3);
	K4 = K[55];
	K5 = K[56];
	K6 = K[57];
	K7 = K[58];
	Yx += Wr1(Xz) + Wy + Wr2(Yy);
	RND(A,B,C,D,E,F,G,H, Yx+ K4);
	Yy += Wr1(Xw) + Wz + Wr2(Yz);
	RND(H,A,B,C,D,E,F,G, Yy+ K5);
	Yz += Wr1(Yx) + Ww + Wr2(Yw);
	RND(G,H,A,B,C,D,E,F, Yz+ K6);
	Yw += Wr1(Yy) + Xx + Wr2(Zx);
	RND(F,G,H,A,B,C,D,E, Yw+ K7);
	K4 = K[59];
	K5 = K[60];
	K6 = K[61];
	K7 = K[62];
	Zx += Wr1(Yz) + Xy + Wr2(Zy);
	RND(E,F,G,H,A,B,C,D, Zx+ K4);
	Zy += Wr1(Yw) + Xz + Wr2(Zz);
	RND(D,E,F,G,H,A,B,C, Zy+ K5);
	Zz += Wr1(Zx) + Xw + Wr2(Zw);
	RND(C,D,E,F,G,H,A,B, Zz+ K6);
	Zw += Wr1(Zy) + Yx + Wr2(Wx);
	RND(B,C,D,E,F,G,H,A, Zw+ K7);
	
#undef A
#undef B
#undef C
#undef D
#undef E
#undef F
#undef G
#undef H

#undef Wx
#undef Wy
#undef Wz
#undef Ww

#undef Xx
#undef Xy
#undef Xz
#undef Xw

#undef Yx
#undef Yy
#undef Yz
#undef Yw

#undef Zx
#undef Zy
#undef Zz
#undef Zw

	tmp0 += (uint4)(S0a, S0b, S0c, S0d);
	tmp1 += (uint4)(S1a, S1b, S1c, S1d);

	*state0 = tmp0;
	*state1 = tmp1;
}
#endif