#ifndef SCRYPT
#define SCRYPT 1
#include "globals.h"
#include "salsa.h"
#include "shittify.h"

#if (CLSIZE == 64)
//void scrypt_core(__global uint16 *restrict lookup){
void scrypt_core(uint16 *X, __global uint16 *restrict lookup){
#else
void scrypt_core(uint8 *X, __global uint8 *restrict lookup){
#endif
	DEFNFACTOR(nfact)

//#if (LOOKUP_GAP != 2)
	const uint xSIZE = CONCURRENT_THREADS;
//#endif

	uint x = (get_global_id(0)%CONCURRENT_THREADS);

#if (CLSIZE == 64)

	const uint ub = CONCURRENT_THREADS*(nfact/LOOKUP_GAP);
	uint idx = 0;

	X[0] = EndianSwapa(X[0].s49e38d27c16b05af);
	X[1] = EndianSwapa(X[1].s49e38d27c16b05af);
	//uint16 X[2];

//	uint16 X[2] = {EndianSwapa(lookup[(x<<1)].s49e38d27c16b05af), EndianSwapa(lookup[(x<<1)+1].s49e38d27c16b05af)};
	//X[0] = EndianSwapa(lookup[(x<<1)].s49e38d27c16b05af);
	//X[1] = EndianSwapa(lookup[(x<<1)+1].s49e38d27c16b05af);

//X[0] = lookup[(x<<1)];
//X[1] = lookup[(x<<1)+1];

//X[0] = EndianSwapa(X[0].s49e38d27c16b05af);
//X[1] = EndianSwapa(X[1].s49e38d27c16b05af);




#else
	//const uint xSIZE = CONCURRENT_THREADS;
	shittify(X);
#endif

#if (CLSIZE == 64)
	for(uint y=0; y<ub; y+=xSIZE){
		lookup[CO_W0] = X[0];
		lookup[CO_W1] = X[1];
#else
	for(uint y=0; y<(nfact/LOOKUP_GAP); ++y){
		for(uint z=0; z<4; z++)
			lookup[CO] = X[z];
#endif

#if (LOOKUP_GAP == 2)
		//salsadb(X);
		salsa(X, one);
#elif (LOOKUP_GAP == 1)
		salsa(X);
#else
		salsa(X, LOOKUP_GAP-one);
#endif
	}

#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
	{
        uint y = (nfact/LOOKUP_GAP);
# if (CLSIZE == 64)
		lookup[CO_0] = X[0];
		lookup[CO_1] = X[1];
# else
		for(uint z=0; z<4; z++)
			lookup[CO] = X[z];
# endif

# if (LOOKUP_GAP == 3)
		salsa(X, zero);
# elif (LOOKUP_GAP == 5)
		salsa(X, three);
# elif (LOOKUP_GAP == 7)
		salsa(X, one);
# else
		for(uint i=0; i<nfact%LOOKUP_GAP; ++i)
			salsa(X, zero);
# endif
	}

#endif
// end write portion

// read portion
	for(uint i=0; i<nfact; i++){

#if (CLSIZE == 64)

# if (LOOKUP_GAP == 2)
		uint j = X[1].sc & (nfact-1);
		uint y = (j>>1);
# elif (LOOKUP_GAP == 4)
		uint j = X[1].sc & (nfact-1);
		uint y = (j>>2);
# elif (LOOKUP_GAP == 8)
		uint j = X[1].sc & (nfact-1);
		uint y = (j>>3);
# elif (LOOKUP_GAP != 1)
		uint j = X[1].sc & (nfact-1);
		uint y = (j/LOOKUP_GAP);
# else
		uint y = X[1].sc & (nfact-1);
# endif

#else

# if (LOOKUP_GAP == 2)
        uint j = X[3].s4 & (nfact-1);
        uint y = (j>>1);
# elif (LOOKUP_GAP == 4)
        uint j = X[3].s4 & (nfact-1);
        uint y = (j>>2);
# elif (LOOKUP_GAP == 8)
        uint j = X[3].s4 & (nfact-1);
        uint y = (j>>3);
# elif (LOOKUP_GAP != 1)
        uint j = X[3].s4 & (nfact-1);
        uint y = (j/LOOKUP_GAP);
# else
        uint y = X[3].s4 & (nfact-1);
# endif

#endif

#if (LOOKUP_GAP == 2)
		if(j&1){
# if (CLSIZE == 64)
			uint16 V[2] = {lookup[CO_0], lookup[CO_1]};
			//salsa(V);
			salsa(V, zero);
			X[0] ^= V[0];
			X[1] ^= V[1];
# else
			uint8 V[4];
			for(uint z=0; z<4; z++)
				V[z] = lookup[CO];
			salsa(V, zero);
			for(uint z=0; z<4; z++)
				X[z] ^= V[z];
# endif

		}else{
# if (CLSIZE == 64)
			X[0] ^= lookup[CO_0];
			X[1] ^= lookup[CO_1];
# else
			for(uint z=0; z<4; z++)
				X[z] ^= lookup[CO];
# endif
		}
		salsa(X, zero);
		//salsa(X);
#elif (LOOKUP_GAP != 1)
		j -= y*LOOKUP_GAP;
		if(j){
# if (CLSIZE == 64)
			uint16 V[2] = {lookup[CO_0], lookup[CO_1]};
			salsa(V, j-one);
			X[0] ^= V[0];
			X[1] ^= V[1];
# else
			uint8 V[4];
			for(uint z=0; z<4; z++)
				V[z] = lookup[CO];
			salsa(V, j-one);
			for(uint z=0; z<4; z++)
				X[z] ^= V[z];
# endif

		}else{
# if (CLSIZE == 64)
			X[0] ^= lookup[CO_0];
			X[1] ^= lookup[CO_1];
# else
			for(uint z=0; z<4; z++)
				X[z] ^= lookup[CO];
# endif
		}
		salsa(X, zero);
#else

# if (CLSIZE == 64)
		X[0] ^= lookup[CO_0];
		X[1] ^= lookup[CO_1];
# else
		for(uint z=0; z<4; z++)
			X[z] ^= lookup[CO];
# endif

		salsa(X);
#endif

	}
// end read portion

#if (CLSIZE == 64)
	X[0] = EndianSwapa(X[0].sc9630da741eb852f);
	X[1] = EndianSwapa(X[1].sc9630da741eb852f);

//uint16 X[2] = {EndianSwapa(lookup[(x<<1)]).s49e38d27c16b05af, EndianSwapa(lookup[(x<<1)+1]).s49e38d27c16b05af};

//lookup[(x<<1)] = X[0];
//lookup[(x<<1)+1] = X[1];
#else
	unshittify(X);
#endif


}

#endif