#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <math.h>

const uint32_t N[21] = {
    0x00000001U,  /* never used, padding */
    0x00000002U,
    0x00000004U,
    0x00000008U,
    0x00000010U,
    0x00000020U,
    0x00000040U,
    0x00000080U,
    0x00000100U,
    0x00000200U,
    0x00000400U,  /* 2^10 == 1024, Litecoin scrypt default */
    0x00000800U,
    0x00001000U,
    0x00002000U,
    0x00004000U,
    0x00008000U,
    0x00010000U,
    0x00020000U,
    0x00040000U,
    0x00080000U,
    0x00100000U
};


void Usage(void){
	printf("./tctomb Nf Lg Tc\n");
}

int main(int argc, char **argv){
	uint64_t Tc;
	uint64_t Lg;
	uint64_t Nf;
	uint64_t zsize = 8;
	uint64_t total_size;
	//uint64_t gap_size;
	uint64_t t_size_total;
	uint64_t t_size_chunk;
	//uint64_t num1;
	//double tmp1;

	//uint32_t blocks = 0;
	//uint32_t block_size = 0;
	uint64_t lookup_gap = 0;

	if(argc == 2){
		Nf = 10;
		Lg = 2;
		Tc = 15498;
	}else if(argc == 3){
		Nf = atoi(argv[1]);
		Lg = atoi(argv[2]);
		//Tc = atoi(argv[3]);
	}else if(argc == 4){
		Nf = atoi(argv[1]);
		Lg = atoi(argv[2]);
		Tc = atoi(argv[3]);
	}else{
		Usage();
		exit(1);
	}

	if( (Nf > 21) | (Nf < 1) ){
		printf("nfactor out of range (1-21)\n");
		exit(1);
	}
	//Nf = N[Nf];
	uint32_t ySIZE = ( N[Nf]/Lg + (N[Nf]%Lg>0) );
	

	//x*(z ## SIZE)   //based on id
	//y*(x ## SIZE)*(z ## SIZE)    //based on ysize and total threads
	//z   //read per thread


	//read 128bytes ysize times at offsets

	t_size_chunk = zsize*16;
	t_size_total = ySIZE*zsize*16;
	//block_size = ySIZE*Tc*zsize;
	//t_size_total = Tc*t_size_chunk;
	total_size = (Tc-1)*zsize;
	total_size += ySIZE*Tc*zsize+zsize;
	total_size *= 16;
	lookup_gap = Tc*zsize*16;

	//total_size = ySIZE*Tc*zsize+t_size_total+zsize;
	//t_size_total = t_size_chunk*ySIZE;
	//each thread reads N chunks at an offset from each block
	//blocks = ySIZE;

	//ySIZE*zsize*16

	printf("Nf:\t%lu\tLg:\t%lu\tTc:\t%lu\n", Nf, Lg, Tc);
	printf("\nSalsa + Write(by gaps):\n");
	printf("Thread chunk:\t\t%lu\n", t_size_chunk);
	printf("Lookup gap:\t\t%lu\n", lookup_gap);
	printf("Thread total:\t\t%lu\n", t_size_total);
	printf("Written:\t\t\%lu\n", total_size);
	//tmp1 = t_size_total/t_size_chunk;
	printf("Read Data(rand) + xor + salsa(every other read) + salsa\n");
	printf("Read:\t\t\%lu\n", total_size);
	printf("\nTotal:\t%lu\n", total_size*2);

	return 0;
}