/*-
 * Copyright 2009 Colin Percival, 2011 ArtForz, 2011 pooler, 2012 mtrlt,
 * 2012-2013 Con Kolivas.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * This file was originally written by Colin Percival as part of the Tarsnap
 * online backup system.
 *
 * V1.3 modified by sterling pickens linuxsociety.org 2014
 */

#include "constants.cl"
#include "sha256.cl"
#include "salsa.cl"
#include "scryptcore.cl"


__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uint4 *restrict input,
volatile __global uint *restrict output, __global uint4 *restrict padcache, const uint4 midstate0, const uint4 midstate16, const uint target){


//const __global uint4 *const_masks, __local uint4 *lbuff){
//__local uint *lbuff, __global uint *restrict gbuff){
	uint gid = get_global_id(0);
	//uint4 gid = 
	//FOURdeclare(gid, get_global_id(0));
	uint4 X[8];
	uint4 tmp0, tmp1;
	uint4 pass0, pass1, pass2;
	//FOURsetup
	FOURdeclare(tstate0, 0, 0, 0, 0);
	FOURdeclare(tstate1, 0, 0, 0, 0);
	FOURdeclare(ostate0, 0, 0, 0, 0);
	FOURdeclare(ostate1, 0, 0, 0, 0);

	//getComponent(a, index, const_masks);
	//getComponent(input[4], 0, const_masks);

//	event_t e = async_work_group_copy(lbuff, const_masks, 4, 0);
//	wait_group_events(1, &e);

//	FOURdeclare(data, getComponent(input[4], 0, lbuff), getComponent(input[4], 1, lbuff), getComponent(input[4], 2, lbuff), gid);
//	FOURdeclare(pad0, getComponent(midstate0, 0, lbuff), getComponent(midstate0, 1, lbuff), getComponent(midstate0, 2, lbuff), getComponent(midstate0, 3, lbuff));
//	FOURdeclare(pad1, getComponent(midstate16, 0, lbuff), getComponent(midstate16, 1, lbuff), getComponent(midstate16, 2, lbuff), getComponent(midstate16, 3, lbuff));

	//FOURdecfromvec(data, input[4]);
	FOURdecfromvec(pad0, midstate0);
	FOURdecfromvec(pad1, midstate16);
	FOURdeclare(data, input[4].x,input[4].y,input[4].z,gid);

	//FOURdeclare(pad0, midstate0.x, midstate0.y, midstate0.z, midstate0.w);
	//FOURdeclare(pad1, midstate16.x, midstate16.y, midstate16.z, midstate16.w);
	DecAllSK
	FOURtovec(pass0, pad0);
	FOURtovec(pass1, pad1);
	FOURtovec(pass2, data);

	SHA256(&pass0,&pass1, pass2, (uint4)(SK02,0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, SK03), 1);

	FOURfromvec(pad0, pass0);
	FOURfromvec(pad1, pass1);
	tmp0 = pass0^SK00;
	tmp1 = pass1^SK00;

	FOURtovec(pass0, ostate0);
	FOURtovec(pass1, ostate1);
	SHA256(&pass0, &pass1, tmp0, tmp1, SK00, SK00, 0);
    FOURfromvec(ostate0, pass0);
    FOURfromvec(ostate1, pass1);

	FOURtovec(pass0, pad0);
	FOURtovec(pass1, pad1);
	tmp0 = pass0^SK01;
	tmp1 = pass1^SK01;
	FOURtovec(pass0, tstate0);
	FOURtovec(pass1, tstate1);
	SHA256(&pass0, &pass1, tmp0, tmp1, SK01, SK01, 0);
	FOURfromvec(tstate0, pass0);
	FOURfromvec(tstate1, pass1);

	tmp0 = pass0;
	tmp1 = pass1;
	SHA256(&pass0, &pass1, input[0],input[1],input[2],input[3], 1);
	FOURfromvec(tstate0, pass0);
	FOURfromvec(tstate1, pass1);

	for (uint i=0; i<4; i++){
		FOURcopy(pad0, tstate0);
		FOURcopy(pad1, tstate1);
		FOURtovec(pass0, ostate0);
		FOURtovec(pass1, ostate1);
		X[(i<<1) ] = pass0;
		X[(i<<1)+1] = pass1;
		FOURtovec(pass0, pad0);
		FOURtovec(pass1, pad1);
		FOURtovec(pass2, data);
		SHA256(&pass0, &pass1, pass2, (uint4)(i+1,SK02,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, SK04), 1);
		FOURfromvec(pad0, pass0);
		FOURfromvec(pad1, pass1);
		SHA256(X+(i<<1),X+(i<<1)+1, pass0, pass1, (uint4)(SK02, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, SK05), 1);
	}

	scrypt_core(X,padcache);

	SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3], 1);
	SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7], 1);

	FOURfromvec(tstate0, tmp0);
	FOURfromvec(tstate1, tmp1);

#define A tstate01
#define B tstate02
#define C tstate03
#define D tstate04
#define E tstate11
#define F tstate12
#define G tstate13
#define H tstate14


//#pragma unroll UNROLL_FACTOR
	for(uint i=0; i<8; i++){
		RND(A,B,C,D,E,F,G,H, fixedWa[i]);
		RND(H,A,B,C,D,E,F,G, fixedWb[i]);
		RND(G,H,A,B,C,D,E,F, fixedWc[i]);
		RND(F,G,H,A,B,C,D,E, fixedWd[i]);
		RND(E,F,G,H,A,B,C,D, fixedWe[i]);
		RND(D,E,F,G,H,A,B,C, fixedWf[i]);
		RND(C,D,E,F,G,H,A,B, fixedWg[i]);
		RND(B,C,D,E,F,G,H,A, fixedWh[i]);
	}


/*
DecAllFIX

RND(A,B,C,D,E,F,G,H, fixedW00);
RND(H,A,B,C,D,E,F,G, fixedW01);
RND(G,H,A,B,C,D,E,F, fixedW02);
RND(F,G,H,A,B,C,D,E, fixedW03);
RND(E,F,G,H,A,B,C,D, fixedW04);
RND(D,E,F,G,H,A,B,C, fixedW05);
RND(C,D,E,F,G,H,A,B, fixedW06);
RND(B,C,D,E,F,G,H,A, fixedW07);

RND(A,B,C,D,E,F,G,H, fixedW08);
RND(H,A,B,C,D,E,F,G, fixedW09);
RND(G,H,A,B,C,D,E,F, fixedW10);
RND(F,G,H,A,B,C,D,E, fixedW11);
RND(E,F,G,H,A,B,C,D, fixedW12);
RND(D,E,F,G,H,A,B,C, fixedW13);
RND(C,D,E,F,G,H,A,B, fixedW14);
RND(B,C,D,E,F,G,H,A, fixedW15);

RND(A,B,C,D,E,F,G,H, fixedW16);
RND(H,A,B,C,D,E,F,G, fixedW17);
RND(G,H,A,B,C,D,E,F, fixedW18);
RND(F,G,H,A,B,C,D,E, fixedW19);
RND(E,F,G,H,A,B,C,D, fixedW20);
RND(D,E,F,G,H,A,B,C, fixedW21);
RND(C,D,E,F,G,H,A,B, fixedW22);
RND(B,C,D,E,F,G,H,A, fixedW23);

RND(A,B,C,D,E,F,G,H, fixedW24);
RND(H,A,B,C,D,E,F,G, fixedW25);
RND(G,H,A,B,C,D,E,F, fixedW26);
RND(F,G,H,A,B,C,D,E, fixedW27);
RND(E,F,G,H,A,B,C,D, fixedW28);
RND(D,E,F,G,H,A,B,C, fixedW29);
RND(C,D,E,F,G,H,A,B, fixedW30);
RND(B,C,D,E,F,G,H,A, fixedW31);

RND(A,B,C,D,E,F,G,H, fixedW32);
RND(H,A,B,C,D,E,F,G, fixedW33);
RND(G,H,A,B,C,D,E,F, fixedW34);
RND(F,G,H,A,B,C,D,E, fixedW35);
RND(E,F,G,H,A,B,C,D, fixedW36);
RND(D,E,F,G,H,A,B,C, fixedW37);
RND(C,D,E,F,G,H,A,B, fixedW38);
RND(B,C,D,E,F,G,H,A, fixedW39);

RND(A,B,C,D,E,F,G,H, fixedW40);
RND(H,A,B,C,D,E,F,G, fixedW41);
RND(G,H,A,B,C,D,E,F, fixedW42);
RND(F,G,H,A,B,C,D,E, fixedW43);
RND(E,F,G,H,A,B,C,D, fixedW44);
RND(D,E,F,G,H,A,B,C, fixedW45);
RND(C,D,E,F,G,H,A,B, fixedW46);
RND(B,C,D,E,F,G,H,A, fixedW47);

RND(A,B,C,D,E,F,G,H, fixedW48);
RND(H,A,B,C,D,E,F,G, fixedW49);
RND(G,H,A,B,C,D,E,F, fixedW50);
RND(F,G,H,A,B,C,D,E, fixedW51);
RND(E,F,G,H,A,B,C,D, fixedW52);
RND(D,E,F,G,H,A,B,C, fixedW53);
RND(C,D,E,F,G,H,A,B, fixedW54);
RND(B,C,D,E,F,G,H,A, fixedW55);

RND(A,B,C,D,E,F,G,H, fixedW56);
RND(H,A,B,C,D,E,F,G, fixedW57);
RND(G,H,A,B,C,D,E,F, fixedW58);
RND(F,G,H,A,B,C,D,E, fixedW59);
RND(E,F,G,H,A,B,C,D, fixedW60);
RND(D,E,F,G,H,A,B,C, fixedW61);
RND(C,D,E,F,G,H,A,B, fixedW62);
RND(B,C,D,E,F,G,H,A, fixedW63);
*/

#undef A
#undef B
#undef C
#undef D
#undef E
#undef F
#undef G
#undef H

	FOURtovec(pass0, tstate0);
	FOURtovec(pass1, tstate1);

	tmp0 += pass0;
	tmp1 += pass1;

	FOURtovec(pass2, ostate0);
	FOURtovec(pass0, ostate1);
	SHA256(&pass2,&pass0, tmp0, tmp1, (uint4)(SK02, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, SK05), 1);

	FOURfromvec(ostate1, pass0);
	bool result = (EndianSwapa(ostate14) <= target);
	if (result)
		SETFOUND(gid);
}
