#ifndef SALSA
#define SALSA


#if (LOOKUP_GAP == 2)
void salsa(vo *B, bool db){
#else
void salsa(vo *B){
#endif

	vo x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;

	x00 = (B[ 0] ^= B[ 16]);
	x01 = (B[ 1] ^= B[ 17]);
	x02 = (B[ 2] ^= B[ 18]);
	x03 = (B[ 3] ^= B[ 19]);
	x04 = (B[ 4] ^= B[ 20]);
	x05 = (B[ 5] ^= B[ 21]);
	x06 = (B[ 6] ^= B[ 22]);
	x07 = (B[ 7] ^= B[ 23]);
	x08 = (B[ 8] ^= B[ 24]);
	x09 = (B[ 9] ^= B[ 25]);
	x10 = (B[10] ^= B[26]);
	x11 = (B[11] ^= B[27]);
	x12 = (B[12] ^= B[28]);
	x13 = (B[13] ^= B[29]);
	x14 = (B[14] ^= B[30]);
	x15 = (B[15] ^= B[31]);

	for(uint i = 0; i < 4; i++){
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
		/* Operate on columns. */
		x04 ^= R(x00+x12, 7);
		x09 ^= R(x05+x01, 7);
		x14 ^= R(x10+x06, 7);
		x03 ^= R(x15+x11, 7);
		x08 ^= R(x04+x00, 9);
		x13 ^= R(x09+x05, 9);
		x02 ^= R(x14+x10, 9);
		x07 ^= R(x03+x15, 9);
		x12 ^= R(x08+x04,13);
		x01 ^= R(x13+x09,13);
		x06 ^= R(x02+x14,13);
		x11 ^= R(x07+x03,13);
		x00 ^= R(x12+x08,18);
		x05 ^= R(x01+x13,18);
		x10 ^= R(x06+x02,18);
		x15 ^= R(x11+x07,18);
		/* Operate on rows. */
		x01 ^= R(x00+x03, 7);
		x06 ^= R(x05+x04, 7);
		x11 ^= R(x10+x09, 7);
		x12 ^= R(x15+x14, 7);
		x02 ^= R(x01+x00, 9);
		x07 ^= R(x06+x05, 9);
		x08 ^= R(x11+x10, 9);
		x13 ^= R(x12+x15, 9);
		x03 ^= R(x02+x01,13);
		x04 ^= R(x07+x06,13);
		x09 ^= R(x08+x11,13);
		x14 ^= R(x13+x12,13);
		x00 ^= R(x03+x02,18);
		x05 ^= R(x04+x07,18);
		x10 ^= R(x09+x08,18);
		x15 ^= R(x14+x13,18);
	}

#if (LOOKUP_GAP == 2)
	if(db){
		x00 = (B[ 0] ^= (B[ 16]+=x00));
		x01 = (B[ 1] ^= (B[ 17]+=x01));
		x02 = (B[ 2] ^= (B[ 18]+=x02));
		x03 = (B[ 3] ^= (B[ 19]+=x03));
		x04 = (B[ 4] ^= (B[ 20]+=x04));
		x05 = (B[ 5] ^= (B[ 21]+=x05));
		x06 = (B[ 6] ^= (B[ 22]+=x06));
		x07 = (B[ 7] ^= (B[ 23]+=x07));
		x08 = (B[ 8] ^= (B[ 24]+=x08));
		x09 = (B[ 9] ^= (B[ 25]+=x09));
		x10 = (B[10] ^= (B[26]+=x10));
		x11 = (B[11] ^= (B[27]+=x11));
		x12 = (B[12] ^= (B[28]+=x12));
		x13 = (B[13] ^= (B[29]+=x13));
		x14 = (B[14] ^= (B[30]+=x14));
		x15 = (B[15] ^= (B[31]+=x15));

		for(uint i = 0; i < 4; i++){
			/* Operate on columns. */
			x04 ^= R(x00+x12, 7);
			x09 ^= R(x05+x01], 7);
			x14 ^= R(x10+x06, 7);
			x03 ^= R(x15+x11, 7);
			x08 ^= R(x04+x00, 9);
			x13 ^= R(x09+x05, 9);
			x02 ^= R(x14+x10, 9);
			x07 ^= R(x03+x15, 9);
			x12 ^= R(x08+x04,13);
			x01 ^= R(x13+x09,13);
			x06 ^= R(x02+x14,13);
			x11 ^= R(x07+x03,13);
			x00 ^= R(x12+x08,18);
			x05 ^= R(x01+x13,18);
			x10 ^= R(x06+x02,18);
			x15 ^= R(x11+x07,18);
			/* Operate on rows. */
			x01 ^= R(x00+x03, 7);
			x06 ^= R(x05+x04, 7);
			x11 ^= R(x10+x09, 7);
			x12 ^= R(x15+x14, 7);
			x02 ^= R(x01+x00, 9);
			x07 ^= R(x06+x05, 9);
			x08 ^= R(x11+x10, 9);
			x13 ^= R(x12+x15, 9);
			x03 ^= R(x02+x01,13);
			x04 ^= R(x07+x06,13);
			x09 ^= R(x08+x11,13);
			x14 ^= R(x13+x12,13);
			x00 ^= R(x03+x02,18);
			x05 ^= R(x04+x07,18);
			x10 ^= R(x09+x08,18);
			x15 ^= R(x14+x13,18);
		}
	}
#undef R


#endif

	B[ 0] += x00;
	B[ 1] += x01;
	B[ 2] += x02;
	B[ 3] += x03;
	B[ 4] += x04;
	B[ 5] += x05;
	B[ 6] += x06;
	B[ 7] += x07;
	B[ 8] += x08;
	B[ 9] += x09;
	B[10] += x10;
	B[11] += x11;
	B[12] += x12;
	B[13] += x13;
	B[14] += x14;
	B[15] += x15;
}


//type specific salsa
#if (CLSIZE == 64)
void salsaT(uint16 *B){
	uint16 x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
#elif (CLSIZE == 32)
void salsaT(uint8 *B){
	uint8 x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
#else
void salsaT(uint4 *B){
	uint4 x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
#endif

	x00 = (B[ 0] ^= B[ 16]);
	x01 = (B[ 1] ^= B[ 17]);
	x02 = (B[ 2] ^= B[ 18]);
	x03 = (B[ 3] ^= B[ 19]);
	x04 = (B[ 4] ^= B[ 20]);
	x05 = (B[ 5] ^= B[ 21]);
	x06 = (B[ 6] ^= B[ 22]);
	x07 = (B[ 7] ^= B[ 23]);
	x08 = (B[ 8] ^= B[ 24]);
	x09 = (B[ 9] ^= B[ 25]);
	x10 = (B[10] ^= B[26]);
	x11 = (B[11] ^= B[27]);
	x12 = (B[12] ^= B[28]);
	x13 = (B[13] ^= B[29]);
	x14 = (B[14] ^= B[30]);
	x15 = (B[15] ^= B[31]);

	for(uint i = 0; i < 4; i++){
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
		/* Operate on columns. */
		x04 ^= R(x00+x12, 7);
		x09 ^= R(x05+x01, 7);
		x14 ^= R(x10+x06, 7);
		x03 ^= R(x15+x11, 7);
		x08 ^= R(x04+x00, 9);
		x13 ^= R(x09+x05, 9);
		x02 ^= R(x14+x10, 9);
		x07 ^= R(x03+x15, 9);
		x12 ^= R(x08+x04,13);
		x01 ^= R(x13+x09,13);
		x06 ^= R(x02+x14,13);
		x11 ^= R(x07+x03,13);
		x00 ^= R(x12+x08,18);
		x05 ^= R(x01+x13,18);
		x10 ^= R(x06+x02,18);
		x15 ^= R(x11+x07,18);
		/* Operate on rows. */
		x01 ^= R(x00+x03, 7);
		x06 ^= R(x05+x04, 7);
		x11 ^= R(x10+x09, 7);
		x12 ^= R(x15+x14, 7);
		x02 ^= R(x01+x00, 9);
		x07 ^= R(x06+x05, 9);
		x08 ^= R(x11+x10, 9);
		x13 ^= R(x12+x15, 9);
		x03 ^= R(x02+x01,13);
		x04 ^= R(x07+x06,13);
		x09 ^= R(x08+x11,13);
		x14 ^= R(x13+x12,13);
		x00 ^= R(x03+x02,18);
		x05 ^= R(x04+x07,18);
		x10 ^= R(x09+x08,18);
		x15 ^= R(x14+x13,18);
	}
#undef R
	B[ 0] += x00;
	B[ 1] += x01;
	B[ 2] += x02;
	B[ 3] += x03;
	B[ 4] += x04;
	B[ 5] += x05;
	B[ 6] += x06;
	B[ 7] += x07;
	B[ 8] += x08;
	B[ 9] += x09;
	B[10] += x10;
	B[11] += x11;
	B[12] += x12;
	B[13] += x13;
	B[14] += x14;
	B[15] += x15;
}


#endif