/*-
 * Copyright 2009 Colin Percival, 2011 ArtForz, 2011 pooler, 2012 mtrlt,
 * 2012-2013 Con Kolivas.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * This file was originally written by Colin Percival as part of the Tarsnap
 * online backup system.
 *
 * V1.3 modified by sterling pickens linuxsociety.org 2014
 */

#include "constants.cl"
#include "sha256.cl"
#include "salsa.cl"
#include "scryptcore.cl"


//__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
//__kernel void search(__global const uint4 *restrict input,
//volatile __global uint *restrict output, __global uint4 *restrict padcache, constant uint4 midstate0, constant uint4 midstate16, const uint target,
//const __global uint4 *const_masks, __local uint4 *lbuff){
//__local uint *lbuff, __global uint *restrict gbuff){



__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uint *restrict input,
volatile __global uint *restrict output, __global uint *restrict padcache, const uint midstate0, const uint midstate16, const uint target){
	uint gid = get_global_id(0);

	uint X[32];
	uint tmp0[4];
	uint tmp1[4];
	//uint tmp2[4];
	uint tstate0[4];
	uint tstate1[4];
	uint ostate0[4];
	uint ostate1[4];
	uint pass0[4];
	uint pass1[4];
	uint pass2[4];
	uint pass3[4];
	uint pass4[4];

	uint pass5[4];
	uint pass6[4];
	uint pass7[4];

	uint data[4] = {input[4].x,input[4].y,input[4].z,gid};
	uint pad0[4] = {midstate0.x,midstate0.y,midstate0.z,midstate0.w};
	uint pad1[4] = {midstate16.x,midstate16.y,midstate16.z,midstate16.w};


    //uint4 tstate0, tstate1, ostate0, ostate1, tmp0, tmp1;
    //uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid);
    //uint4 pad0 = midstate0, pad1 = midstate16;



	//uint data[4];
	//uint pad0[4];
	//uint pad1[4];

	DecAllSK

	FAcpyi(pass2,SK02,0,0,0)
	FAcpyi(pass3,0,0,0,0)
	FAcpyi(pass4,0,0,0,SK03)
	FAcpy(pass0, pad0)
	FAcpy(pass1, pad1)

	SHA256(pass0, pass1, data, pass2, pass3, pass4, 1);
	FAcpy(pad0, pass0)
	FAcpy(pad1, pass1)
	FAxor(tmp0, pad0, SK00)
	FAxor(tmp1, pad1, SK00)
	FAcpy(pass0, ostate0)
	FAcpy(pass1, ostate1)
	FAcpyi(pass2, SK00, SK00, SK00, SK00)


	SHA256(pass0, pass1, tmp0, tmp1, pass2, pass2, 0);
	FAcpy(ostate0, pass0)
	FAcpy(ostate1, pass1)
	//FAcopy(pass0, pad0)
	//FAcopy(pass1, pad1)
	FAxor(tmp0, pad0, SK01)
	FAxor(tmp1, pad1, SK01)
	FAcpy(pass0, tstate0)
	FAcpy(pass1, tstate1)
//pass5 = SK01;
FAcpyi(pass5, SK01, SK01, SK01, SK01)

	SHA256(pass0, pass1, tmp0, tmp1, pass5, pass5, 0);
	FAcpy(tstate0, pass0)
	FAcpy(tstate1, pass1)
	FAcpy(tmp0, pass0)
	FAcpy(tmp1, pass1)
	//FAcpy(, pass1)
	//FAcpy(tmp1, pass1)
FAcpyoffset(pass2, 0, input, 0)
FAcpyoffset(pass5, 0, input, 4)
FAcpyoffset(pass6, 0, input, 8)
FAcpyoffset(pass7, 0, input, 12)

//pass2 = input;
//pass5 = input[4];
//pass6 = input[8];
//pass7 = input[12];

	SHA256(pass0, pass1, pass2, pass5, pass6, pass7, 1);
	FAcpy(tstate0, pass0)
	FAcpy(tstate1, pass1)

/*
	for(uint i=0; i<4; i++){
		FAcpy(pad0, tstate0)
		FAcpy(pad1, tstate1)
		//FAcopy(pass0, ostate0)
		//FAcopy(pass1, ostate1)
		//writing 8 uints per pass
		FAcpyoffset(X, i<<3, ostate0, 0)
		FAcpyoffset(X, (i<<3)+4, ostate1, 0)
		//FAcopy(pass0, pad0)
		//FAcopy(pass1, pad1)
		//FAcopy(pass2, data)
		FAcpyi(pass0, i+1, SK02, 0, 0)
		FAcpyi(pass1, 0, 0, 0, SK04)


		SHA256(pad0, pad1, data, pass0, pass3, pass1, 1);
		//FAcopy(pad0, pass0)
		//FAcopy(pad1, pass1)
		FAcpyi(pass0, SK02, 0U, 0U, 0U)
		FAcpyi(pass1, 0U, 0U, 0U, SK05)
		SHA256((X+(i<<3)),(X+(i<<3)+4), pad0, pad1, pass0, pass1, 1);
	}

	scrypt_core(X,padcache);

FAcpyoffset(pass2, 0, X, 0)
FAcpyoffset(pass5, 0, X, 4)
FAcpyoffset(pass6, 0, X, 8)
FAcpyoffset(pass7, 0, X, 12)

	SHA256(tmp0, tmp1, pass2, pass5, pass6, pass7, (const uint *)1);

FAcpyoffset(pass2, 0, X, 16)
FAcpyoffset(pass5, 0, X, 20)
FAcpyoffset(pass6, 0, X, 24)
FAcpyoffset(pass7, 0, X, 28)
	SHA256(tmp0, tmp1, pass2, pass5, pass6, pass7, 1);

	FAcpy(tstate0, tmp0)
	FAcpy(tstate1, tmp1)


#define A tstate0[0]
#define B tstate0[1]
#define C tstate0[2]
#define D tstate0[3]
#define E tstate1[0]
#define F tstate1[1]
#define G tstate1[2]
#define H tstate1[3]

	for(uint i=0; i<8; i++){
		RND(A,B,C,D,E,F,G,H, fixedWa[i]);
		RND(H,A,B,C,D,E,F,G, fixedWb[i]);
		RND(G,H,A,B,C,D,E,F, fixedWc[i]);
		RND(F,G,H,A,B,C,D,E, fixedWd[i]);
		RND(E,F,G,H,A,B,C,D, fixedWe[i]);
		RND(D,E,F,G,H,A,B,C, fixedWf[i]);
		RND(C,D,E,F,G,H,A,B, fixedWg[i]);
		RND(B,C,D,E,F,G,H,A, fixedWh[i]);
	}

#undef A
#undef B
#undef C
#undef D
#undef E
#undef F
#undef G
#undef H

	//FAcopy(pass0, tstate0)
	//FAcopy(pass1, tstate1)
	FAadd(tmp0, tstate0, tmp0)
	FAadd(tmp1, tstate1, tmp1)

	//FAcopy(pass2, ostate0)
	//FAcopy(pass0, ostate1)

	FAcpyi(pass0, SK02, 0U, 0U, 0U)
	FAcpyi(pass1, 0U, 0U, 0U, SK05)

	//pass0 = (SK02, 0U, 0U, 0U);
	//pass1 = (0U, 0U, 0U, SK05);
	SHA256(ostate0, ostate1, tmp0, tmp1, pass0,pass1, 1);
	//FAcopy(ostate1, pass0)
*/
	bool result = (EndianSwapa(ostate1[3]) <= target);
	if (result)
		SETFOUND(gid);
}
