#ifndef SCRYPT
#define SCRYPT 1

#if (CLSIZE == 64)
void scrypt_core(uint16 *X, __global uint16 *restrict lookup){
#else
void scrypt_core(uint8 *X, __global uint8 *restrict lookup){
#endif
	DEFNFACTOR(nfact)
	const uint xSIZE = CONCURRENT_THREADS;
	uint x = (get_global_id(0)%CONCURRENT_THREADS);

#if (CLSIZE == 64)
	const uint ub = xSIZE*(nfact/LOOKUP_GAP);
	uint idx = 0;
    X[0] = EndianSwapa(X[0].s49e38d27c16b05af);
    X[1] = EndianSwapa(X[1].s49e38d27c16b05af);

//barrier(CLK_GLOBAL_MEM_FENCE);
//if(lookup[(get_global_id(0)%CONCURRENT_THREADS)<<1].s0 != 0)
//	x = ((1+get_global_id(0))%CONCURRENT_THREADS);

//for(uint i=1; lookup[x<<1].s0 != 0; i++){
	//idx++;
//	x = ((i+get_global_id(0))%CONCURRENT_THREADS);
//}
//lookup[x<<1].s0 = 1;
//lookup(idx=(y+x)<<1)

//    X[0] = EndianSwapb(X[0].s49e38d27c16b05af);
//    X[1] = EndianSwapb(X[1].s49e38d27c16b05af);


//    X[0] = EndianSwapa(X[0].s49e38d27c16b05af);
//    X[1] = EndianSwapb(X[1].s49e38d27c16b05af);

//    X[0] = EndianSwapb(X[0].s49e38d27c16b05af);
//    X[1] = EndianSwapa(X[1].s49e38d27c16b05af);

#endif
	//uint x = (get_global_id(0)%CONCURRENT_THREADS);





#if (CLSIZE == 64)
	for(uint y=0; y<ub; y+=xSIZE){
#else
	for(uint y=0; y<(nfact/LOOKUP_GAP); ++y){
#endif
#if (CLSIZE == 64)
		lookup[CO_W0] = X[0];
		lookup[CO_W1] = X[1];
#else
		for(uint z=0; z<4; z++)
			lookup[CO] = X[z];
#endif

#if (LOOKUP_GAP == 2)
		salsa(X, one);
#elif (LOOKUP_GAP == 1)
		salsa(X);
#else
		salsa(X, LOOKUP_GAP-one);
#endif
	}

#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
        uint y = (nfact/LOOKUP_GAP);
# if (CLSIZE == 64)
		lookup[CO_0] = X[0];
		lookup[CO_1] = X[1];
# else
		//idx = ((y*xSIZE+x)<<2);
		for(uint z=0; z<4; z++)
			lookup[CO] = X[z];
# endif

# if (LOOKUP_GAP == 3)
		salsa(X, zero);
# elif (LOOKUP_GAP == 5)
		salsa(X, three);
# elif (LOOKUP_GAP == 7)
		salsa(X, one);
# else
		for(uint i=0; i<nfact%LOOKUP_GAP; ++i)
			salsa(X, zero);
# endif


#endif
// end write portion

//barrier(CLK_GLOBAL_MEM_FENCE);

// read portion
	for(uint i=0; i<nfact; i++){

#if (CLSIZE == 64)

# if (LOOKUP_GAP == 2)
		uint j = X[1].sc & (nfact-1);
		uint y = (j>>1);
# elif (LOOKUP_GAP == 4)
		uint j = X[1].sc & (nfact-1);
		uint y = (j>>2);
# elif (LOOKUP_GAP == 8)
		uint j = X[1].sc & (nfact-1);
		uint y = (j>>3);
# elif (LOOKUP_GAP != 1)
		uint j = X[1].sc & (nfact-1);
		uint y = (j/LOOKUP_GAP);
# else
		uint y = X[1].sc & (nfact-1);
# endif

#else

# if (LOOKUP_GAP == 2)
        uint j = X[3].s4 & (nfact-1);
        uint y = (j>>1);
# elif (LOOKUP_GAP == 4)
        uint j = X[3].s4 & (nfact-1);
        uint y = (j>>2);
# elif (LOOKUP_GAP == 8)
        uint j = X[3].s4 & (nfact-1);
        uint y = (j>>3);
# elif (LOOKUP_GAP != 1)
        uint j = X[3].s4 & (nfact-1);
        uint y = (j/LOOKUP_GAP);
# else
        uint y = X[3].s4 & (nfact-1);
# endif

#endif

#if (LOOKUP_GAP == 2)
		if(j&1){
# if (CLSIZE == 64)
			uint16 V[2] = {lookup[CO_0], lookup[CO_1]};
			salsa(V, zero);
			X[0] ^= V[0];
			X[1] ^= V[1];
# else
			uint8 V[4];
			for(uint z=0; z<4; z++)
				V[z] = lookup[CO];
			salsa(V, zero);
			for(uint z=0; z<4; z++)
				X[z] ^= V[z];
# endif

		}else{
# if (CLSIZE == 64)
			X[0] ^= lookup[CO_0];
			X[1] ^= lookup[CO_1];
# else
			for(uint z=0; z<4; z++)
				X[z] ^= lookup[CO];
# endif
		}
		salsa(X, zero);

#elif (LOOKUP_GAP != 1)
		j -= y*LOOKUP_GAP;
		if(j){
# if (CLSIZE == 64)
			uint16 V[2] = {lookup[CO_0], lookup[CO_1]};
			salsa(V, j-one);
			X[0] ^= V[0];
			X[1] ^= V[1];
# else
			uint8 V[4];
			for(uint z=0; z<4; z++)
				V[z] = lookup[CO];
			salsa(V, j-one);
			for(uint z=0; z<4; z++)
				X[z] ^= V[z];
# endif

		}else{
# if (CLSIZE == 64)
			X[0] ^= lookup[CO_0];
			X[1] ^= lookup[CO_1];
# else
			for(uint z=0; z<4; z++)
				X[z] ^= lookup[CO];
# endif
		}
		salsa(X, zero);
#else

# if (CLSIZE == 64)
		X[0] ^= lookup[CO_0];
		X[1] ^= lookup[CO_1];
# else
		for(uint z=0; z<4; z++)
			X[z] ^= lookup[CO];
# endif

		salsa(X);
#endif

	}
// end read portion

#if (CLSIZE == 64)
	X[0] = EndianSwapa(X[0].sc9630da741eb852f);
	X[1] = EndianSwapa(X[1].sc9630da741eb852f);
//lookup[x<<1].s0 = 0;

//    X[0] = EndianSwapb(X[0].sc9630da741eb852f);
//    X[1] = EndianSwapb(X[1].sc9630da741eb852f);


//    X[0] = EndianSwapa(X[0].sc9630da741eb852f);
//    X[1] = EndianSwapb(X[1].sc9630da741eb852f);


//    X[0] = EndianSwapb(X[0].sc9630da741eb852f);
//    X[1] = EndianSwapa(X[1].sc9630da741eb852f);

#endif


}

#endif