#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/time.h>

// NOT designed for arm
// NOT designed for 32-bit
// lsb x86_64 linux only

#define LRAND(s) \
(((s) = (s) * 41943011 - 2147483647))

struct timeval cur;


void Setup_Data(uint32_t *data, uint32_t size){
	/*
		data[0] = size
		data[1] = loops
		data[2..10] = 8 different offsets
		data[11..27] = 8 high/high pairs  that's start/end
		data[28..44] = 8 low/low pairs    that's start/end
	*/
	data[0] = size;
	//data[1] = 

}


int main(int argc, char **argv){
	uint32_t *data;
	gettimeofday(&cur, NULL);
	uint64_t seed = (uint64_t)cur.tv_usec + 3;
	//uint32_t data[10];
	uint32_t ret1 = 0;
	uint32_t num1 = 0;
	uint32_t num2 = 0;
	uint64_t ret2 = 0;
	uint64_t ret3 = 0;

	uint32_t h_start, h_end;
	uint32_t l_start, l_end;
	uint64_t cycles_s, cycles_e;
	uint64_t cycles_final;

	//data[0] = 5;
	//data[1] = 7;
	//data = malloc(sizeof(uint32_t)*10);
	data = malloc(sizeof(uint32_t)*1048576); // 1KB

	for(ret1=0; ret1<262144; ret1++){
		data[ret1] = 0;
	}
	data[0] = 1048576;
	//data[1] = UINT32_MAX/2/128;
	data[1] = 8096;
	data[2] = LRAND(seed) % 64;
	data[3] = LRAND(seed) % 64;
	data[4] = LRAND(seed) % 64;
	data[5] = LRAND(seed) % 64;
	data[6] = LRAND(seed) % 64;
	data[7] = LRAND(seed) % 64;
	data[8] = LRAND(seed) % 64;
	data[9] = LRAND(seed) % 64;
	data[10] = LRAND(seed) % 64;

	for(num1=0; num1<11; num1++){
		printf("%u: %u\n", num1, data[num1]);
	}
printf("here 1\n");
	//Reader(data, &ret1, &ret2);
//	Reader(data);

	Reader(data, 1048576*4, 8096);
printf("here 2\n");
	//Reader(data);

printf("here 3\n");
	num2 = 26;
	/*
	10 11
	12 13
	14 15
	16 17
	18 19
	20 21
	22 23
	24 25

a 2943284818 0
b 294163 0


	26 27
	28 29
	30 31
	32 33
	34 35
	36 37
	38 39
	40 41
	*/

	for(num1=10; num1<26; num1+=2){
		printf("a %u %u\n", data[num1], data[num1+1]);
		printf("b %u %u\n", data[num2], data[num2+1]);
		cycles_s = data[num1];
		cycles_e = data[num1+1];
		cycles_s = (cycles_s) | (uint64_t)data[num2] >> 32;
		cycles_e = (cycles_e) | (uint64_t)data[num2+1] >> 32;
		cycles_final = cycles_e - cycles_s;
		printf("%lu\n", cycles_final);
		num2 += 2;
	}

/*
	for(num1=0; num1<16; num1+=2){
		cycles_s = data[11+num1];
		cycles_e = data[11+num1+1];
		cycles_s = (cycles_s << 32) | (uint64_t)data[28+num1];
		cycles_e = (cycles_e << 32) | (uint64_t)data[28+num1+1];
		cycles_final = cycles_e - cycles_s;
		printf("%lu\n", cycles_final);
	}
*/
	//printf("d0: %u d1: %u\n", data[0], data[1]);
	/*
	for(ret1=0; ret1<10; ret1++){
		printf("%u: %u\n", ret1, data[ret1]);
	}
	ret2 = data[1];
	ret2 = (ret2 << 32) | (uint64_t)data[0];

	//ret2 = data[0] & (data[1] >> 32);
	sleep(1);
	Reader(data);
	for(ret1=0; ret1<10; ret1++){
		printf("%u: %u\n", ret1, data[ret1]);
	}
	ret3 = data[1];
	ret3 = (ret3 << 32) | (uint64_t)data[0];
	//ret3 = data[0] & (data[1] >> 32);

	printf("%lu\n", ret2);
	printf("%lu\n", ret3);
	printf("%lu\n", ret3-ret2);
	*/
	//printf("r1: %u r2: %u\n", ret1, ret2);
	free(data);
	return 0;
}