#ifndef TRANSFORM_H
#define TRANSFORM_H

void Transform(vo X){
	vo Y[32];

	for(uint i=0; i<32; i++)
		Y[i] = X[i];

#ifdef VECTORS16
	X[0] = (uint16)(Y[0].s0, Y[1].s0, Y[2].s0, Y[3].s0, Y[4].s0, Y[5].s0, Y[6].s0, Y[7].s0, Y[8].s0, Y[9].s0, Y[10].s0, Y[11].s0, Y[12].s0, Y[13].s0, Y[14].s0,, X[15].s0);
	X[1] = (uint16)(Y[16].s0, Y[17].s0, Y[18].s0, Y[19].s0, Y[20].s0, Y[21].s0, Y[22].s0, Y[23].s0, Y[24].s0, Y[25].s0, Y[26].s0, Y[27].s0, Y[28].s0, Y[29].s0, Y[30].s0, Y[31].s0);
	X[2] = (uint16)(Y[0].s1, Y[1].s1, Y[2].s1, Y[3].s1, Y[4].s1, Y[5].s1, Y[6].s1, Y[7].s1, Y[8].s1, Y[9].s1, Y[10].s1, Y[11].s1, Y[12].s1, Y[13].s1, Y[14].s1,, X[15].s1);
	X[3] = (uint16)(Y[16].s1, Y[17].s1, Y[18].s1, Y[19].s1, Y[20].s1, Y[21].s1, Y[22].s1, Y[23].s1, Y[24].s1, Y[25].s1, Y[26].s1, Y[27].s1, Y[28].s1, Y[29].s1, Y[30].s1, Y[31].s1);
	X[4] = (uint16)(Y[0].s2, Y[1].s2, Y[2].s2, Y[3].s2, Y[4].s2, Y[5].s2, Y[6].s2, Y[7].s2, Y[8].s2, Y[9].s2, Y[10].s2, Y[11].s2, Y[12].s2, Y[13].s2, Y[14].s2,, X[15].s2);
	X[5] = (uint16)(Y[16].s2, Y[17].s2, Y[18].s2, Y[19].s2, Y[20].s2, Y[21].s2, Y[22].s2, Y[23].s2, Y[24].s2, Y[25].s2, Y[26].s2, Y[27].s2, Y[28].s2, Y[29].s2, Y[30].s2, Y[31].s2);
	X[6] = (uint16)(Y[0].s3, Y[1].s3, Y[2].s3, Y[3].s3, Y[4].s3, Y[5].s3, Y[6].s3, Y[7].s3, Y[8].s3, Y[9].s3, Y[10].s3, Y[11].s3, Y[12].s3, Y[13].s3, Y[14].s3,, X[15].s3);
	X[7] = (uint16)(Y[16].s3, Y[17].s3, Y[18].s3, Y[19].s3, Y[20].s3, Y[21].s3, Y[22].s3, Y[23].s3, Y[24].s3, Y[25].s3, Y[26].s3, Y[27].s3, Y[28].s3, Y[29].s3, Y[30].s3, Y[31].s3);
	X[8] = (uint16)(Y[0].s4, Y[1].s4, Y[2].s4, Y[3].s4, Y[4].s4, Y[5].s4, Y[6].s4, Y[7].s4, Y[8].s4, Y[9].s4, Y[10].s4, Y[11].s4, Y[12].s4, Y[13].s4, Y[14].s4,, X[15].s4);
	X[9] = (uint16)(Y[16].s4, Y[17].s4, Y[18].s4, Y[19].s4, Y[20].s4, Y[21].s4, Y[22].s4, Y[23].s4, Y[24].s4, Y[25].s4, Y[26].s4, Y[27].s4, Y[28].s4, Y[29].s4, Y[30].s4, Y[31].s4);
	X[10] = (uint16)(Y[0].s5, Y[1].s5, Y[2].s5, Y[3].s5, Y[4].s5, Y[5].s5, Y[6].s5, Y[7].s5, Y[8].s5, Y[9].s5, Y[10].s5, Y[11].s5, Y[12].s5, Y[13].s5, Y[14].s5,, X[15].s5);
	X[11] = (uint16)(Y[16].s5, Y[17].s5, Y[18].s5, Y[19].s5, Y[20].s5, Y[21].s5, Y[22].s5, Y[23].s5, Y[24].s5, Y[25].s5, Y[26].s5, Y[27].s5, Y[28].s5, Y[29].s5, Y[30].s5, Y[31].s5);
	X[12] = (uint16)(Y[0].s6, Y[1].s6, Y[2].s6, Y[3].s6, Y[4].s6, Y[5].s6, Y[6].s6, Y[7].s6, Y[8].s6, Y[9].s6, Y[10].s6, Y[11].s6, Y[12].s6, Y[13].s6, Y[14].s6,, X[15].s6);
	X[13] = (uint16)(Y[16].s6, Y[17].s6, Y[18].s6, Y[19].s6, Y[20].s6, Y[21].s6, Y[22].s6, Y[23].s6, Y[24].s6, Y[25].s6, Y[26].s6, Y[27].s6, Y[28].s6, Y[29].s6, Y[30].s6, Y[31].s6);
	X[14] = (uint16)(Y[0].s7, Y[1].s7, Y[2].s7, Y[3].s7, Y[4].s7, Y[5].s7, Y[6].s7, Y[7].s7, Y[8].s7, Y[9].s7, Y[10].s7, Y[11].s7, Y[12].s7, Y[13].s7, Y[14].s7,, X[15].s7);
	X[15] = (uint16)(Y[16].s7, Y[17].s7, Y[18].s7, Y[19].s7, Y[20].s7, Y[21].s7, Y[22].s7, Y[23].s7, Y[24].s7, Y[25].s7, Y[26].s7, Y[27].s7, Y[28].s7, Y[29].s7, Y[30].s7, Y[31].s7);
	X[16] = (uint16)(Y[0].s8, Y[1].s8, Y[2].s8, Y[3].s8, Y[4].s8, Y[5].s8, Y[6].s8, Y[7].s8, Y[8].s8, Y[9].s8, Y[10].s8, Y[11].s8, Y[12].s8, Y[13].s8, Y[14].s8,, X[15].s8);
	X[17] = (uint16)(Y[16].s8, Y[17].s8, Y[18].s8, Y[19].s8, Y[20].s8, Y[21].s8, Y[22].s8, Y[23].s8, Y[24].s8, Y[25].s8, Y[26].s8, Y[27].s8, Y[28].s8, Y[29].s8, Y[30].s8, Y[31].s8);
	X[18] = (uint16)(Y[0].s9, Y[1].s9, Y[2].s9, Y[3].s9, Y[4].s9, Y[5].s9, Y[6].s9, Y[7].s9, Y[8].s9, Y[9].s9, Y[10].s9, Y[11].s9, Y[12].s9, Y[13].s9, Y[14].s9,, X[15].s9);
	X[19] = (uint16)(Y[16].s9, Y[17].s9, Y[18].s9, Y[19].s9, Y[20].s9, Y[21].s9, Y[22].s9, Y[23].s9, Y[24].s9, Y[25].s9, Y[26].s9, Y[27].s9, Y[28].s9, Y[29].s9, Y[30].s9, Y[31].s9);
	X[20] = (uint16)(Y[0].sa, Y[1].sa, Y[2].sa, Y[3].sa, Y[4].sa, Y[5].sa, Y[6].sa, Y[7].sa, Y[8].sa, Y[9].sa, Y[10].sa, Y[11].sa, Y[12].sa, Y[13].sa, Y[14].sa,, X[15].sa);
	X[21] = (uint16)(Y[16].sa, Y[17].sa, Y[18].sa, Y[19].sa, Y[20].sa, Y[21].sa, Y[22].sa, Y[23].sa, Y[24].sa, Y[25].sa, Y[26].sa, Y[27].sa, Y[28].sa, Y[29].sa, Y[30].sa, Y[31].sa);
	X[22] = (uint16)(Y[0].sb, Y[1].sb, Y[2].sb, Y[3].sb, Y[4].sb, Y[5].sb, Y[6].sb, Y[7].sb, Y[8].sb, Y[9].sb, Y[10].sb, Y[11].sb, Y[12].sb, Y[13].sb, Y[14].sb,, X[15].sb);
	X[23] = (uint16)(Y[16].sb, Y[17].sb, Y[18].sb, Y[19].sb, Y[20].sb, Y[21].sb, Y[22].sb, Y[23].sb, Y[24].sb, Y[25].sb, Y[26].sb, Y[27].sb, Y[28].sb, Y[29].sb, Y[30].sb, Y[31].sb);
	X[24] = (uint16)(Y[0].sc, Y[1].sc, Y[2].sc, Y[3].sc, Y[4].sc, Y[5].sc, Y[6].sc, Y[7].sc, Y[8].sc, Y[9].sc, Y[10].sc, Y[11].sc, Y[12].sc, Y[13].sc, Y[14].sc,, X[15].sc);
	X[25] = (uint16)(Y[16].sc, Y[17].sc, Y[18].sc, Y[19].sc, Y[20].sc, Y[21].sc, Y[22].sc, Y[23].sc, Y[24].sc, Y[25].sc, Y[26].sc, Y[27].sc, Y[28].sc, Y[29].sc, Y[30].sc, Y[31].sc);
	X[26] = (uint16)(Y[0].sd, Y[1].sd, Y[2].sd, Y[3].sd, Y[4].sd, Y[5].sd, Y[6].sd, Y[7].sd, Y[8].sd, Y[9].sd, Y[10].sd, Y[11].sd, Y[12].sd, Y[13].sd, Y[14].sd,, X[15].sd);
	X[27] = (uint16)(Y[16].sd, Y[17].sd, Y[18].sd, Y[19].sd, Y[20].sd, Y[21].sd, Y[22].sd, Y[23].sd, Y[24].sd, Y[25].sd, Y[26].sd, Y[27].sd, Y[28].sd, Y[29].sd, Y[30].sd, Y[31].sd);
	X[28] = (uint16)(Y[0].se, Y[1].se, Y[2].se, Y[3].se, Y[4].se, Y[5].se, Y[6].se, Y[7].se, Y[8].se, Y[9].se, Y[10].se, Y[11].se, Y[12].se, Y[13].se, Y[14].se,, X[15].se);
	X[29] = (uint16)(Y[16].se, Y[17].se, Y[18].se, Y[19].se, Y[20].se, Y[21].se, Y[22].se, Y[23].se, Y[24].se, Y[25].se, Y[26].se, Y[27].se, Y[28].se, Y[29].se, Y[30].se, Y[31].se);
	X[30] = (uint16)(Y[0].sf, Y[1].sf, Y[2].sf, Y[3].sf, Y[4].sf, Y[5].sf, Y[6].sf, Y[7].sf, Y[8].sf, Y[9].sf, Y[10].sf, Y[11].sf, Y[12].sf, Y[13].sf, Y[14].sf,, X[15].sf);
	X[31] = (uint16)(Y[16].sf, Y[17].sf, Y[18].sf, Y[19].sf, Y[20].sf, Y[21].sf, Y[22].sf, Y[23].sf, Y[24].sf, Y[25].sf, Y[26].sf, Y[27].sf, Y[28].sf, Y[29].sf, Y[30].sf, Y[31].sf);
#elif defined(VECTORS8)
	X[0] = (uint8)(Y[0].s0, Y[1].s0, Y[2].s0, Y[3].s0, Y[4].s0, Y[5].s0, Y[6].s0, Y[7].s0);
	X[1] = (uint8)(Y[8].s0, Y[9].s0, Y[10].s0, Y[11].s0, Y[12].s0, Y[13].s0, Y[14].s0, Y[15].s0);
	X[2] = (uint8)(Y[16].s0, Y[17].s0, Y[18].s0, Y[19].s0, Y[20].s0, Y[21].s0, Y[22].s0, Y[23].s0);
	X[3] = (uint8)(Y[24].s0, Y[25].s0, Y[26].s0, Y[27].s0, Y[28].s0, Y[29].s0, Y[30].s0, Y[31].s0);
	X[4] = (uint8)(Y[0].s1, Y[1].s1, Y[2].s1, Y[3].s1, Y[4].s1, Y[5].s1, Y[6].s1, Y[7].s1);
	X[5] = (uint8)(Y[8].s1, Y[9].s1, Y[10].s1, Y[11].s1, Y[12].s1, Y[13].s1, Y[14].s1, Y[15].s1);
	X[6] = (uint8)(Y[16].s1, Y[17].s1, Y[18].s1, Y[19].s1, Y[20].s1, Y[21].s1, Y[22].s1, Y[23].s1);
	X[7] = (uint8)(Y[24].s1, Y[25].s1, Y[26].s1, Y[27].s1, Y[28].s1, Y[29].s1, Y[30].s1, Y[31].s1);
	X[8] = (uint8)(Y[0].s2, Y[1].s2, Y[2].s2, Y[3].s2, Y[4].s2, Y[5].s2, Y[6].s2, Y[7].s2);
	X[9] = (uint8)(Y[8].s2, Y[9].s2, Y[10].s2, Y[11].s2, Y[12].s2, Y[13].s2, Y[14].s2, Y[15].s2);
	X[10] = (uint8)(Y[16].s2, Y[17].s2, Y[18].s2, Y[19].s2, Y[20].s2, Y[21].s2, Y[22].s2, Y[23].s2);
	X[11] = (uint8)(Y[24].s2, Y[25].s2, Y[26].s2, Y[27].s2, Y[28].s2, Y[29].s2, Y[30].s2, Y[31].s2);
	X[12] = (uint8)(Y[0].s3, Y[1].s3, Y[2].s3, Y[3].s3, Y[4].s3, Y[5].s3, Y[6].s3, Y[7].s3);
	X[13] = (uint8)(Y[8].s3, Y[9].s3, Y[10].s3, Y[11].s3, Y[12].s3, Y[13].s3, Y[14].s3, Y[15].s3);
	X[14] = (uint8)(Y[16].s3, Y[17].s3, Y[18].s3, Y[19].s3, Y[20].s3, Y[21].s3, Y[22].s3, Y[23].s3);
	X[15] = (uint8)(Y[24].s3, Y[25].s3, Y[26].s3, Y[27].s3, Y[28].s3, Y[29].s3, Y[30].s3, Y[31].s3);
	X[16] = (uint8)(Y[0].s4, Y[1].s4, Y[2].s4, Y[3].s4, Y[4].s4, Y[5].s4, Y[6].s4, Y[7].s4);
	X[17] = (uint8)(Y[8].s4, Y[9].s4, Y[10].s4, Y[11].s4, Y[12].s4, Y[13].s4, Y[14].s4, Y[15].s4);
	X[18] = (uint8)(Y[16].s4, Y[17].s4, Y[18].s4, Y[19].s4, Y[20].s4, Y[21].s4, Y[22].s4, Y[23].s4);
	X[19] = (uint8)(Y[24].s4, Y[25].s4, Y[26].s4, Y[27].s4, Y[28].s4, Y[29].s4, Y[30].s4, Y[31].s4);
	X[20] = (uint8)(Y[0].s5, Y[1].s5, Y[2].s5, Y[3].s5, Y[4].s5, Y[5].s5, Y[6].s5, Y[7].s5);
	X[21] = (uint8)(Y[8].s5, Y[9].s5, Y[10].s5, Y[11].s5, Y[12].s5, Y[13].s5, Y[14].s5, Y[15].s5);
	X[22] = (uint8)(Y[16].s5, Y[17].s5, Y[18].s5, Y[19].s5, Y[20].s5, Y[21].s5, Y[22].s5, Y[23].s5);
	X[23] = (uint8)(Y[24].s5, Y[25].s5, Y[26].s5, Y[27].s5, Y[28].s5, Y[29].s5, Y[30].s5, Y[31].s5);
	X[24] = (uint8)(Y[0].s6, Y[1].s6, Y[2].s6, Y[3].s6, Y[4].s6, Y[5].s6, Y[6].s6, Y[7].s6);
	X[25] = (uint8)(Y[8].s6, Y[9].s6, Y[10].s6, Y[11].s6, Y[12].s6, Y[13].s6, Y[14].s6, Y[15].s6);
	X[26] = (uint8)(Y[16].s6, Y[17].s6, Y[18].s6, Y[19].s6, Y[20].s6, Y[21].s6, Y[22].s6, Y[23].s6);
	X[27] = (uint8)(Y[24].s6, Y[25].s6, Y[26].s6, Y[27].s6, Y[28].s6, Y[29].s6, Y[30].s6, Y[31].s6);
	X[28] = (uint8)(Y[0].s7, Y[1].s7, Y[2].s7, Y[3].s7, Y[4].s7, Y[5].s7, Y[6].s7, Y[7].s7);
	X[29] = (uint8)(Y[8].s7, Y[9].s7, Y[10].s7, Y[11].s7, Y[12].s7, Y[13].s7, Y[14].s7, Y[15].s7);
	X[30] = (uint8)(Y[16].s7, Y[17].s7, Y[18].s7, Y[19].s7, Y[20].s7, Y[21].s7, Y[22].s7, Y[23].s7);
	X[31] = (uint8)(Y[24].s7, Y[25].s7, Y[26].s7, Y[27].s7, Y[28].s7, Y[29].s7, Y[30].s7, Y[31].s7);
#elif defined(VECTORS4)
	X[0] = (uint4)(Y[0].s0, Y[1].s0, Y[2].s0, Y[3].s0);
	X[1] = (uint4)(Y[4].s0, Y[5].s0, Y[6].s0, Y[7].s0);
	X[2] = (uint4)(Y[8].s0, Y[9].s0, Y[10].s0, Y[11].s0);
	X[3] = (uint4)(Y[12].s0, Y[13].s0, Y[14].s0, Y[15].s0);
	X[4] = (uint4)(Y[16].s0, Y[17].s0, Y[18].s0, Y[19].s0);
	X[5] = (uint4)(Y[20].s0, Y[21].s0, Y[22].s0, Y[23].s0);
	X[6] = (uint4)(Y[24].s0, Y[25].s0, Y[26].s0, Y[27].s0);
	X[7] = (uint4)(Y[28].s0, Y[29].s0, Y[30].s0, Y[31].s0);
	X[8] = (uint4)(Y[0].s1, Y[1].s1, Y[2].s1, Y[3].s1);
	X[9] = (uint4)(Y[4].s1, Y[5].s1, Y[6].s1, Y[7].s1);
	X[10] = (uint4)(Y[8].s1, Y[9].s1, Y[10].s1, Y[11].s1);
	X[11] = (uint4)(Y[12].s1, Y[13].s1, Y[14].s1, Y[15].s1);
	X[12] = (uint4)(Y[16].s1, Y[17].s1, Y[18].s1, Y[19].s1);
	X[13] = (uint4)(Y[20].s1, Y[21].s1, Y[22].s1, Y[23].s1);
	X[14] = (uint4)(Y[24].s1, Y[25].s1, Y[26].s1, Y[27].s1);
	X[15] = (uint4)(Y[28].s1, Y[29].s1, Y[30].s1, Y[31].s1);
	X[16] = (uint4)(Y[0].s2, Y[1].s2, Y[2].s2, Y[3].s2);
	X[17] = (uint4)(Y[4].s2, Y[5].s2, Y[6].s2, Y[7].s2);
	X[18] = (uint4)(Y[8].s2, Y[9].s2, Y[10].s2, Y[11].s2);
	X[19] = (uint4)(Y[12].s2, Y[13].s2, Y[14].s2, Y[15].s2);
	X[20] = (uint4)(Y[16].s2, Y[17].s2, Y[18].s2, Y[19].s2);
	X[21] = (uint4)(Y[20].s2, Y[21].s2, Y[22].s2, Y[23].s2);
	X[22] = (uint4)(Y[24].s2, Y[25].s2, Y[26].s2, Y[27].s2);
	X[23] = (uint4)(Y[28].s2, Y[29].s2, Y[30].s2, Y[31].s2);
	X[24] = (uint4)(Y[0].s3, Y[1].s3, Y[2].s3, Y[3].s3);
	X[25] = (uint4)(Y[4].s3, Y[5].s3, Y[6].s3, Y[7].s3);
	X[26] = (uint4)(Y[8].s3, Y[9].s3, Y[10].s3, Y[11].s3);
	X[27] = (uint4)(Y[12].s3, Y[13].s3, Y[14].s3, Y[15].s3);
	X[28] = (uint4)(Y[16].s3, Y[17].s3, Y[18].s3, Y[19].s3);
	X[29] = (uint4)(Y[20].s3, Y[21].s3, Y[22].s3, Y[23].s3);
	X[30] = (uint4)(Y[24].s3, Y[25].s3, Y[26].s3, Y[27].s3);
	X[31] = (uint4)(Y[28].s3, Y[29].s3, Y[30].s3, Y[31].s3);
#elif defined(VECTORS2)
	X[0] = (uint2)(Y[0].s0, Y[1].s0);
	X[1] = (uint2)(Y[2].s0, Y[3].s0);
	X[2] = (uint2)(Y[4].s0, Y[5].s0);
	X[3] = (uint2)(Y[6].s0, Y[7].s0);
	X[4] = (uint2)(Y[8].s0, Y[9].s0);
	X[5] = (uint2)(Y[10].s0, Y[11].s0);
	X[6] = (uint2)(Y[12].s0, Y[13].s0);
	X[7] = (uint2)(Y[14].s0, Y[15].s0);
	X[8] = (uint2)(Y[16].s0, Y[17].s0);
	X[9] = (uint2)(Y[18].s0, Y[19].s0);
	X[10] = (uint2)(Y[20].s0, Y[21].s0);
	X[11] = (uint2)(Y[22].s0, Y[23].s0);
	X[12] = (uint2)(Y[24].s0, Y[25].s0);
	X[13] = (uint2)(Y[26].s0, Y[27].s0);
	X[14] = (uint2)(Y[28].s0, Y[29].s0);
	X[15] = (uint2)(Y[30].s0, Y[31].s0);
	X[16] = (uint2)(Y[0].s1, Y[1].s1);
	X[17] = (uint2)(Y[2].s1, Y[3].s1);
	X[18] = (uint2)(Y[4].s1, Y[5].s1);
	X[19] = (uint2)(Y[6].s1, Y[7].s1);
	X[20] = (uint2)(Y[8].s1, Y[9].s1);
	X[21] = (uint2)(Y[10].s1, Y[11].s1);
	X[22] = (uint2)(Y[12].s1, Y[13].s1);
	X[23] = (uint2)(Y[14].s1, Y[15].s1);
	X[24] = (uint2)(Y[16].s1, Y[17].s1);
	X[25] = (uint2)(Y[18].s1, Y[19].s1);
	X[26] = (uint2)(Y[20].s1, Y[21].s1);
	X[27] = (uint2)(Y[22].s1, Y[23].s1);
	X[28] = (uint2)(Y[24].s1, Y[25].s1);
	X[29] = (uint2)(Y[26].s1, Y[27].s1);
	X[30] = (uint2)(Y[28].s1, Y[29].s1);
	X[31] = (uint2)(Y[30].s1, Y[31].s1);
#else
//shouldn't be here
#endif

}

#endif


void UnTransform(vo X){
	vo Y[32];

	for(uint i=0; i<32; i++)
		Y[i] = X[i];

#ifdef VECTORS2
	X[0] = (uint2)(Y[0].s0, Y[16].s0);
	X[1] = (uint2)(Y[0].s1, Y[16].s1);
	X[2] = (uint2)(Y[1].s0, Y[17].s0);
	X[3] = (uint2)(Y[1].s1, Y[17].s1);
	X[4] = (uint2)(Y[2].s0, Y[18].s0);
	X[5] = (uint2)(Y[2].s1, Y[18].s1);
	X[6] = (uint2)(Y[3].s0, Y[19].s0);
	X[7] = (uint2)(Y[3].s1, Y[19].s1);
	X[8] = (uint2)(Y[4].s0, Y[20].s0);
	X[9] = (uint2)(Y[4].s1, Y[20].s1);
	X[10] = (uint2)(Y[5].s0, Y[21].s0);
	X[11] = (uint2)(Y[5].s1, Y[21].s1);
	X[12] = (uint2)(Y[6].s0, Y[22].s0);
	X[13] = (uint2)(Y[6].s1, Y[22].s1);
	X[14] = (uint2)(Y[7].s0, Y[23].s0);
	X[15] = (uint2)(Y[7].s1, Y[23].s1);
	X[16] = (uint2)(Y[8].s0, Y[24].s0);
	X[17] = (uint2)(Y[8].s1, Y[24].s1);
	X[18] = (uint2)(Y[9].s0, Y[25].s0);
	X[19] = (uint2)(Y[9].s1, Y[25].s1);
	X[20] = (uint2)(Y[10].s0, Y[26].s0);
	X[21] = (uint2)(Y[10].s1, Y[26].s1);
	X[22] = (uint2)(Y[11].s0, Y[27].s0);
	X[23] = (uint2)(Y[11].s1, Y[27].s1);
	X[24] = (uint2)(Y[12].s0, Y[28].s0);
	X[25] = (uint2)(Y[12].s1, Y[28].s1);
	X[26] = (uint2)(Y[13].s0, Y[29].s0);
	X[27] = (uint2)(Y[13].s1, Y[29].s1);
	X[28] = (uint2)(Y[14].s0, Y[30].s0);
	X[29] = (uint2)(Y[14].s1, Y[30].s1);
	X[30] = (uint2)(Y[15].s0, Y[31].s0);
	X[31] = (uint2)(Y[15].s1, Y[31].s1);
#elif defined(VECTORS4)
	X[0] = (uint4)(Y[0].s0, Y[8].s0, Y[16].s0, Y[24].s0);
	X[1] = (uint4)(Y[0].s1, Y[8].s1, Y[16].s1, Y[24].s1);
	X[2] = (uint4)(Y[0].s2, Y[8].s2, Y[16].s2, Y[24].s2);
	X[3] = (uint4)(Y[0].s3, Y[8].s3, Y[16].s3, Y[24].s3);
	X[4] = (uint4)(Y[1].s0, Y[9].s0, Y[17].s0, Y[25].s0);
	X[5] = (uint4)(Y[1].s1, Y[9].s1, Y[17].s1, Y[25].s1);
	X[6] = (uint4)(Y[1].s2, Y[9].s2, Y[17].s2, Y[25].s2);
	X[7] = (uint4)(Y[1].s3, Y[9].s3, Y[17].s3, Y[25].s3);
	X[8] = (uint4)(Y[2].s0, Y[10].s0, Y[18].s0, Y[26].s0);
	X[9] = (uint4)(Y[2].s1, Y[10].s1, Y[18].s1, Y[26].s1);
	X[10] = (uint4)(Y[2].s2, Y[10].s2, Y[18].s2, Y[26].s2);
	X[11] = (uint4)(Y[2].s3, Y[10].s3, Y[18].s3, Y[26].s3);
	X[12] = (uint4)(Y[3].s0, Y[11].s0, Y[19].s0, Y[27].s0);
	X[13] = (uint4)(Y[3].s1, Y[11].s1, Y[19].s1, Y[27].s1);
	X[14] = (uint4)(Y[3].s2, Y[11].s2, Y[19].s2, Y[27].s2);
	X[15] = (uint4)(Y[3].s3, Y[11].s3, Y[19].s3, Y[27].s3);
	X[16] = (uint4)(Y[4].s0, Y[12].s0, Y[20].s0, Y[28].s0);
	X[17] = (uint4)(Y[4].s1, Y[12].s1, Y[20].s1, Y[28].s1);
	X[18] = (uint4)(Y[4].s2, Y[12].s2, Y[20].s2, Y[28].s2);
	X[19] = (uint4)(Y[4].s3, Y[12].s3, Y[20].s3, Y[28].s3);
	X[20] = (uint4)(Y[5].s0, Y[13].s0, Y[21].s0, Y[29].s0);
	X[21] = (uint4)(Y[5].s1, Y[13].s1, Y[21].s1, Y[29].s1);
	X[22] = (uint4)(Y[5].s2, Y[13].s2, Y[21].s2, Y[29].s2);
	X[23] = (uint4)(Y[5].s3, Y[13].s3, Y[21].s3, Y[29].s3);
	X[24] = (uint4)(Y[6].s0, Y[14].s0, Y[22].s0, Y[30].s0);
	X[25] = (uint4)(Y[6].s1, Y[14].s1, Y[22].s1, Y[30].s1);
	X[26] = (uint4)(Y[6].s2, Y[14].s2, Y[22].s2, Y[30].s2);
	X[27] = (uint4)(Y[6].s3, Y[14].s3, Y[22].s3, Y[30].s3);
	X[28] = (uint4)(Y[7].s0, Y[15].s0, Y[23].s0, Y[31].s0);
	X[29] = (uint4)(Y[7].s1, Y[15].s1, Y[23].s1, Y[31].s1);
	X[30] = (uint4)(Y[7].s2, Y[15].s2, Y[23].s2, Y[31].s2);
	X[31] = (uint4)(Y[7].s3, Y[15].s3, Y[23].s3, Y[31].s3);
#elif defined(VECTORS8)
	X[0] = (uint8)(Y[0].s0, Y[4].s0, Y[8].s0, Y[12].s0, Y[16].s0, Y[20].s0, Y[24].s0, Y[28].s0);
	X[1] = (uint8)(Y[0].s1, Y[4].s1, Y[8].s1, Y[12].s1, Y[16].s1, Y[20].s1, Y[24].s1, Y[28].s1);
	X[2] = (uint8)(Y[0].s2, Y[4].s2, Y[8].s2, Y[12].s2, Y[16].s2, Y[20].s2, Y[24].s2, Y[28].s2);
	X[3] = (uint8)(Y[0].s3, Y[4].s3, Y[8].s3, Y[12].s3, Y[16].s3, Y[20].s3, Y[24].s3, Y[28].s3);
	X[4] = (uint8)(Y[0].s4, Y[4].s4, Y[8].s4, Y[12].s4, Y[16].s4, Y[20].s4, Y[24].s4, Y[28].s4);
	X[5] = (uint8)(Y[0].s5, Y[4].s5, Y[8].s5, Y[12].s5, Y[16].s5, Y[20].s5, Y[24].s5, Y[28].s5);
	X[6] = (uint8)(Y[0].s6, Y[4].s6, Y[8].s6, Y[12].s6, Y[16].s6, Y[20].s6, Y[24].s6, Y[28].s6);
	X[7] = (uint8)(Y[0].s7, Y[4].s7, Y[8].s7, Y[12].s7, Y[16].s7, Y[20].s7, Y[24].s7, Y[28].s7);
	X[8] = (uint8)(Y[1].s0, Y[5].s0, Y[9].s0, Y[13].s0, Y[17].s0, Y[21].s0, Y[25].s0, Y[29].s0);
	X[9] = (uint8)(Y[1].s1, Y[5].s1, Y[9].s1, Y[13].s1, Y[17].s1, Y[21].s1, Y[25].s1, Y[29].s1);
	X[10] = (uint8)(Y[1].s2, Y[5].s2, Y[9].s2, Y[13].s2, Y[17].s2, Y[21].s2, Y[25].s2, Y[29].s2);
	X[11] = (uint8)(Y[1].s3, Y[5].s3, Y[9].s3, Y[13].s3, Y[17].s3, Y[21].s3, Y[25].s3, Y[29].s3);
	X[12] = (uint8)(Y[1].s4, Y[5].s4, Y[9].s4, Y[13].s4, Y[17].s4, Y[21].s4, Y[25].s4, Y[29].s4);
	X[13] = (uint8)(Y[1].s5, Y[5].s5, Y[9].s5, Y[13].s5, Y[17].s5, Y[21].s5, Y[25].s5, Y[29].s5);
	X[14] = (uint8)(Y[1].s6, Y[5].s6, Y[9].s6, Y[13].s6, Y[17].s6, Y[21].s6, Y[25].s6, Y[29].s6);
	X[15] = (uint8)(Y[1].s7, Y[5].s7, Y[9].s7, Y[13].s7, Y[17].s7, Y[21].s7, Y[25].s7, Y[29].s7);
	X[16] = (uint8)(Y[2].s0, Y[6].s0, Y[10].s0, Y[14].s0, Y[18].s0, Y[22].s0, Y[26].s0, Y[30].s0);
	X[17] = (uint8)(Y[2].s1, Y[6].s1, Y[10].s1, Y[14].s1, Y[18].s1, Y[22].s1, Y[26].s1, Y[30].s1);
	X[18] = (uint8)(Y[2].s2, Y[6].s2, Y[10].s2, Y[14].s2, Y[18].s2, Y[22].s2, Y[26].s2, Y[30].s2);
	X[19] = (uint8)(Y[2].s3, Y[6].s3, Y[10].s3, Y[14].s3, Y[18].s3, Y[22].s3, Y[26].s3, Y[30].s3);
	X[20] = (uint8)(Y[2].s4, Y[6].s4, Y[10].s4, Y[14].s4, Y[18].s4, Y[22].s4, Y[26].s4, Y[30].s4);
	X[21] = (uint8)(Y[2].s5, Y[6].s5, Y[10].s5, Y[14].s5, Y[18].s5, Y[22].s5, Y[26].s5, Y[30].s5);
	X[22] = (uint8)(Y[2].s6, Y[6].s6, Y[10].s6, Y[14].s6, Y[18].s6, Y[22].s6, Y[26].s6, Y[30].s6);
	X[23] = (uint8)(Y[2].s7, Y[6].s7, Y[10].s7, Y[14].s7, Y[18].s7, Y[22].s7, Y[26].s7, Y[30].s7);
	X[24] = (uint8)(Y[3].s0, Y[7].s0, Y[11].s0, Y[15].s0, Y[19].s0, Y[23].s0, Y[27].s0, Y[31].s0);
	X[25] = (uint8)(Y[3].s1, Y[7].s1, Y[11].s1, Y[15].s1, Y[19].s1, Y[23].s1, Y[27].s1, Y[31].s1);
	X[26] = (uint8)(Y[3].s2, Y[7].s2, Y[11].s2, Y[15].s2, Y[19].s2, Y[23].s2, Y[27].s2, Y[31].s2);
	X[27] = (uint8)(Y[3].s3, Y[7].s3, Y[11].s3, Y[15].s3, Y[19].s3, Y[23].s3, Y[27].s3, Y[31].s3);
	X[28] = (uint8)(Y[3].s4, Y[7].s4, Y[11].s4, Y[15].s4, Y[19].s4, Y[23].s4, Y[27].s4, Y[31].s4);
	X[29] = (uint8)(Y[3].s5, Y[7].s5, Y[11].s5, Y[15].s5, Y[19].s5, Y[23].s5, Y[27].s5, Y[31].s5);
	X[30] = (uint8)(Y[3].s6, Y[7].s6, Y[11].s6, Y[15].s6, Y[19].s6, Y[23].s6, Y[27].s6, Y[31].s6);
	X[31] = (uint8)(Y[3].s7, Y[7].s7, Y[11].s7, Y[15].s7, Y[19].s7, Y[23].s7, Y[27].s7, Y[31].s7);
#elif defined(VECTORS16)
	X[0] = (uint16)(Y[0].s0, Y[2].s0, Y[4].s0, Y[6].s0, Y[8].s0, Y[10].s0, Y[12].s0, Y[14].s0, Y[16].s0, Y[18].s0, Y[20].s0, Y[22].s0, Y[24].s0, Y[26].s0, Y[28].s0, Y[30].s0);
	X[1] = (uint16)(Y[0].s1, Y[2].s1, Y[4].s1, Y[6].s1, Y[8].s1, Y[10].s1, Y[12].s1, Y[14].s1, Y[16].s1, Y[18].s1, Y[20].s1, Y[22].s1, Y[24].s1, Y[26].s1, Y[28].s1, Y[30].s1);
	X[2] = (uint16)(Y[0].s2, Y[2].s2, Y[4].s2, Y[6].s2, Y[8].s2, Y[10].s2, Y[12].s2, Y[14].s2, Y[16].s2, Y[18].s2, Y[20].s2, Y[22].s2, Y[24].s2, Y[26].s2, Y[28].s2, Y[30].s2);
	X[3] = (uint16)(Y[0].s3, Y[2].s3, Y[4].s3, Y[6].s3, Y[8].s3, Y[10].s3, Y[12].s3, Y[14].s3, Y[16].s3, Y[18].s3, Y[20].s3, Y[22].s3, Y[24].s3, Y[26].s3, Y[28].s3, Y[30].s3);
	X[4] = (uint16)(Y[0].s4, Y[2].s4, Y[4].s4, Y[6].s4, Y[8].s4, Y[10].s4, Y[12].s4, Y[14].s4, Y[16].s4, Y[18].s4, Y[20].s4, Y[22].s4, Y[24].s4, Y[26].s4, Y[28].s4, Y[30].s4);
	X[5] = (uint16)(Y[0].s5, Y[2].s5, Y[4].s5, Y[6].s5, Y[8].s5, Y[10].s5, Y[12].s5, Y[14].s5, Y[16].s5, Y[18].s5, Y[20].s5, Y[22].s5, Y[24].s5, Y[26].s5, Y[28].s5, Y[30].s5);
	X[6] = (uint16)(Y[0].s6, Y[2].s6, Y[4].s6, Y[6].s6, Y[8].s6, Y[10].s6, Y[12].s6, Y[14].s6, Y[16].s6, Y[18].s6, Y[20].s6, Y[22].s6, Y[24].s6, Y[26].s6, Y[28].s6, Y[30].s6);
	X[7] = (uint16)(Y[0].s7, Y[2].s7, Y[4].s7, Y[6].s7, Y[8].s7, Y[10].s7, Y[12].s7, Y[14].s7, Y[16].s7, Y[18].s7, Y[20].s7, Y[22].s7, Y[24].s7, Y[26].s7, Y[28].s7, Y[30].s7);
	X[8] = (uint16)(Y[0].s8, Y[2].s8, Y[4].s8, Y[6].s8, Y[8].s8, Y[10].s8, Y[12].s8, Y[14].s8, Y[16].s8, Y[18].s8, Y[20].s8, Y[22].s8, Y[24].s8, Y[26].s8, Y[28].s8, Y[30].s8);
	X[9] = (uint16)(Y[0].s9, Y[2].s9, Y[4].s9, Y[6].s9, Y[8].s9, Y[10].s9, Y[12].s9, Y[14].s9, Y[16].s9, Y[18].s9, Y[20].s9, Y[22].s9, Y[24].s9, Y[26].s9, Y[28].s9, Y[30].s9);
	X[10] = (uint16)(Y[0].sa, Y[2].sa, Y[4].sa, Y[6].sa, Y[8].sa, Y[10].sa, Y[12].sa, Y[14].sa, Y[16].sa, Y[18].sa, Y[20].sa, Y[22].sa, Y[24].sa, Y[26].sa, Y[28].sa, Y[30].sa);
	X[11] = (uint16)(Y[0].sb, Y[2].sb, Y[4].sb, Y[6].sb, Y[8].sb, Y[10].sb, Y[12].sb, Y[14].sb, Y[16].sb, Y[18].sb, Y[20].sb, Y[22].sb, Y[24].sb, Y[26].sb, Y[28].sb, Y[30].sb);
	X[12] = (uint16)(Y[0].sc, Y[2].sc, Y[4].sc, Y[6].sc, Y[8].sc, Y[10].sc, Y[12].sc, Y[14].sc, Y[16].sc, Y[18].sc, Y[20].sc, Y[22].sc, Y[24].sc, Y[26].sc, Y[28].sc, Y[30].sc);
	X[13] = (uint16)(Y[0].sd, Y[2].sd, Y[4].sd, Y[6].sd, Y[8].sd, Y[10].sd, Y[12].sd, Y[14].sd, Y[16].sd, Y[18].sd, Y[20].sd, Y[22].sd, Y[24].sd, Y[26].sd, Y[28].sd, Y[30].sd);
	X[14] = (uint16)(Y[0].se, Y[2].se, Y[4].se, Y[6].se, Y[8].se, Y[10].se, Y[12].se, Y[14].se, Y[16].se, Y[18].se, Y[20].se, Y[22].se, Y[24].se, Y[26].se, Y[28].se, Y[30].se);
	X[15] = (uint16)(Y[0].sf, Y[2].sf, Y[4].sf, Y[6].sf, Y[8].sf, Y[10].sf, Y[12].sf, Y[14].sf, Y[16].sf, Y[18].sf, Y[20].sf, Y[22].sf, Y[24].sf, Y[26].sf, Y[28].sf, Y[30].sf);
	X[16] = (uint16)(Y[1].s0, Y[3].s0, Y[5].s0, Y[7].s0, Y[9].s0, Y[11].s0, Y[13].s0, Y[15].s0, Y[17].s0, Y[19].s0, Y[21].s0, Y[23].s0, Y[25].s0, Y[27].s0, Y[29].s0, Y[31].s0);
	X[17] = (uint16)(Y[1].s1, Y[3].s1, Y[5].s1, Y[7].s1, Y[9].s1, Y[11].s1, Y[13].s1, Y[15].s1, Y[17].s1, Y[19].s1, Y[21].s1, Y[23].s1, Y[25].s1, Y[27].s1, Y[29].s1, Y[31].s1);
	X[18] = (uint16)(Y[1].s2, Y[3].s2, Y[5].s2, Y[7].s2, Y[9].s2, Y[11].s2, Y[13].s2, Y[15].s2, Y[17].s2, Y[19].s2, Y[21].s2, Y[23].s2, Y[25].s2, Y[27].s2, Y[29].s2, Y[31].s2);
	X[19] = (uint16)(Y[1].s3, Y[3].s3, Y[5].s3, Y[7].s3, Y[9].s3, Y[11].s3, Y[13].s3, Y[15].s3, Y[17].s3, Y[19].s3, Y[21].s3, Y[23].s3, Y[25].s3, Y[27].s3, Y[29].s3, Y[31].s3);
	X[20] = (uint16)(Y[1].s4, Y[3].s4, Y[5].s4, Y[7].s4, Y[9].s4, Y[11].s4, Y[13].s4, Y[15].s4, Y[17].s4, Y[19].s4, Y[21].s4, Y[23].s4, Y[25].s4, Y[27].s4, Y[29].s4, Y[31].s4);
	X[21] = (uint16)(Y[1].s5, Y[3].s5, Y[5].s5, Y[7].s5, Y[9].s5, Y[11].s5, Y[13].s5, Y[15].s5, Y[17].s5, Y[19].s5, Y[21].s5, Y[23].s5, Y[25].s5, Y[27].s5, Y[29].s5, Y[31].s5);
	X[22] = (uint16)(Y[1].s6, Y[3].s6, Y[5].s6, Y[7].s6, Y[9].s6, Y[11].s6, Y[13].s6, Y[15].s6, Y[17].s6, Y[19].s6, Y[21].s6, Y[23].s6, Y[25].s6, Y[27].s6, Y[29].s6, Y[31].s6);
	X[23] = (uint16)(Y[1].s7, Y[3].s7, Y[5].s7, Y[7].s7, Y[9].s7, Y[11].s7, Y[13].s7, Y[15].s7, Y[17].s7, Y[19].s7, Y[21].s7, Y[23].s7, Y[25].s7, Y[27].s7, Y[29].s7, Y[31].s7);
	X[24] = (uint16)(Y[1].s8, Y[3].s8, Y[5].s8, Y[7].s8, Y[9].s8, Y[11].s8, Y[13].s8, Y[15].s8, Y[17].s8, Y[19].s8, Y[21].s8, Y[23].s8, Y[25].s8, Y[27].s8, Y[29].s8, Y[31].s8);
	X[25] = (uint16)(Y[1].s9, Y[3].s9, Y[5].s9, Y[7].s9, Y[9].s9, Y[11].s9, Y[13].s9, Y[15].s9, Y[17].s9, Y[19].s9, Y[21].s9, Y[23].s9, Y[25].s9, Y[27].s9, Y[29].s9, Y[31].s9);
	X[26] = (uint16)(Y[1].sa, Y[3].sa, Y[5].sa, Y[7].sa, Y[9].sa, Y[11].sa, Y[13].sa, Y[15].sa, Y[17].sa, Y[19].sa, Y[21].sa, Y[23].sa, Y[25].sa, Y[27].sa, Y[29].sa, Y[31].sa);
	X[27] = (uint16)(Y[1].sb, Y[3].sb, Y[5].sb, Y[7].sb, Y[9].sb, Y[11].sb, Y[13].sb, Y[15].sb, Y[17].sb, Y[19].sb, Y[21].sb, Y[23].sb, Y[25].sb, Y[27].sb, Y[29].sb, Y[31].sb);
	X[28] = (uint16)(Y[1].sc, Y[3].sc, Y[5].sc, Y[7].sc, Y[9].sc, Y[11].sc, Y[13].sc, Y[15].sc, Y[17].sc, Y[19].sc, Y[21].sc, Y[23].sc, Y[25].sc, Y[27].sc, Y[29].sc, Y[31].sc);
	X[29] = (uint16)(Y[1].sd, Y[3].sd, Y[5].sd, Y[7].sd, Y[9].sd, Y[11].sd, Y[13].sd, Y[15].sd, Y[17].sd, Y[19].sd, Y[21].sd, Y[23].sd, Y[25].sd, Y[27].sd, Y[29].sd, Y[31].sd);
	X[30] = (uint16)(Y[1].se, Y[3].se, Y[5].se, Y[7].se, Y[9].se, Y[11].se, Y[13].se, Y[15].se, Y[17].se, Y[19].se, Y[21].se, Y[23].se, Y[25].se, Y[27].se, Y[29].se, Y[31].se);
	X[31] = (uint16)(Y[1].sf, Y[3].sf, Y[5].sf, Y[7].sf, Y[9].sf, Y[11].sf, Y[13].sf, Y[15].sf, Y[17].sf, Y[19].sf, Y[21].sf, Y[23].sf, Y[25].sf, Y[27].sf, Y[29].sf, Y[31].sf);
#else
//vector1 shouldn't call this function
#endif

}

#endif