#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>

#define ALGS 8
#define RANGES 4
#define MAX_SIZE 32768

#ifndef UINT32_MAX
#define UINT32_MAX (4294967295U)
#endif

static void Print_Alg(uint16_t alg){
	//if(alg == 0)
	//	printf("byte_loop");
	if(alg>(ALGS-1)){
		printf("error");
		return;
	}
	uint8_t n1;
	const char *algs[] = {"byte_loop", "loop", "unrolled_loop", "rep_byte", "rep_4byte", "rep_8byte", "vector_loop", "libcall"};
	printf("\t%s", algs[alg]);
	//for(n1=0; n1<ALGS; n1++){
	for(n1=strlen(algs[alg]); n1<14; n1++)
		printf(" ");

	//}
}

int main(int argc, char **argv){
	FILE *in;
	uint16_t a,b,c=0;
	size_t cur_wins = 0;
	size_t max_wins = 0;
	//size_t cur_fastest = 0;
	size_t cur_cycles = 0;

//	uint16_t positions_alg[4] = {0,0,0,0};
//	uint16_t positions_start[4] = {0,0,0,0};
//	uint16_t positions_end[4] = {0,0,0,0};
//	uint16_t positions_max[4] = {32764, 32765, 32766, 32767};

	uint16_t winning_starts[4] = {0,0,0,0};
	uint16_t winning_algs[4] = {0,0,0,0};
	uint32_t data[ALGS][MAX_SIZE];
	uint32_t fastest[MAX_SIZE];
	uint32_t read_buffer_size = 64;
	char *read_buffer = (char *)malloc(read_buffer_size + 1);
	(void)memset( (void *)read_buffer, '\0', read_buffer_size + 1);

	uint16_t test_max = MAX_SIZE; // = atoi(argv[1]);
	//uint16_t end_a = 0;
	//uint16_t end_b = 0;
	uint16_t start_a = 0;
	uint16_t start_b = 0;
	uint16_t start_c = 0;
	uint16_t start_d = 0;
	uint16_t min_start_a = 0;
	uint16_t min_start_b = 1;
	uint16_t min_start_c = 2;
	uint16_t min_start_d = 3;
	//uint16_t max_end_a = test_max - 3;
	//uint16_t max_end_b = test_max - 2;
	//uint16_t max_end_c = test_max - 1;
	uint16_t max_end_d; // = test_max;

//	uint16_t wins_a = 0;
//	uint16_t wins_b = 0;
//	uint16_t wins_c = 0;
	//uint16_t wins_d = 0;

	//uint16_t min_end_a = 1;
	//uint16_t min_end_b = 2;
	uint8_t alg_a = 0;
//	uint8_t alg_b = 0;
//	uint8_t alg_c = 0;
//	uint8_t alg_d = 0;
/*
uint16_t wins_Ra[8] = {0,0,0,0,0,0,0,0};
uint16_t wins_Rb[8] = {0,0,0,0,0,0,0,0};
uint16_t wins_Rc[8] = {0,0,0,0,0,0,0,0};
uint16_t wins_Rd[8] = {0,0,0,0,0,0,0,0};
uint16_t wins_Ra_t[8] = {0,0,0,0,0,0,0,0};
uint16_t wins_Rb_t[8] = {0,0,0,0,0,0,0,0};
uint16_t wins_Rc_t[8] = {0,0,0,0,0,0,0,0};
*/
//uint16_t wins_Rd_t[8] = {0,0,0,0,0,0,0,0};

	uint16_t winners[4] = {0,0,0,0};
	uint16_t wins[RANGES-1][ALGS];
	uint16_t wins_T[RANGES][ALGS];
	uint16_t wins_B[2][ALGS];

	(void)memset((void *)wins, '\0', sizeof(uint16_t)*(RANGES-1)*ALGS);
	(void)memset((void *)wins_T, '\0', sizeof(uint16_t)*RANGES*ALGS);

	if(argc != 1)
		test_max = atoi(argv[1]);
	max_end_d = test_max;
	printf("Testing to %u\n", test_max);

	for(a=0; a<ALGS; a++){
#ifdef IS32BIT
		if(a == 5)
			continue;
#endif
		(void)sprintf(read_buffer, "results/r%u.txt", a+1);
		in = fopen(read_buffer, "r");
		b = 0;
		(void)memset( (void *)read_buffer, '\0', read_buffer_size + 1);
		while( fgets(read_buffer, read_buffer_size, in) != NULL){
			if(b == MAX_SIZE)
				break;
			data[a][b] = atoi(read_buffer);
			b++;
		}
		fclose(in);
	}
	free(read_buffer);

	//put only the times of the fastest in new array
	for(a=0; a<MAX_SIZE; a++){
		cur_cycles = UINT32_MAX;
		for(b=0; b<ALGS; b++){
			if(data[b][a]<cur_cycles){
				cur_cycles = data[b][a];
				c = b;
			}
		}
		fastest[a] = c;
	}


//start_a = min_start_a;

	//calculate wins for each alg of each range on start (NOTE: will be 1 position checked unless mins_start_* changed)
	//except range d where max_end_d - min_start_d are checked
	for(start_d = min_start_d; start_d<max_end_d; start_d++){
		for(alg_a=0; alg_a<ALGS; alg_a++){
			if(fastest[start_d] == alg_a)
				wins_T[3][alg_a]++;
		}
	}

        for(start_c = min_start_c; start_c<min_start_d; start_c++){
                for(alg_a=0; alg_a<ALGS; alg_a++){
                        if(fastest[start_c] == alg_a)
                                wins[2][alg_a]++;
                }
        }

        for(start_b = min_start_b; start_b<min_start_c; start_b++){
                for(alg_a=0; alg_a<ALGS; alg_a++){
                        if(fastest[start_b] == alg_a)
                                wins[1][alg_a]++;
                }
        }

        for(start_a = min_start_a; start_a<min_start_b; start_a++){
                for(alg_a=0; alg_a<ALGS; alg_a++){
                        if(fastest[start_a] == alg_a)
                                wins[0][alg_a]++;
                }
        }

//(void)memcpy((void *)&wins[0][0], (const void *)&wins_B[0][0], sizeof(uint16_t)*ALGS);
//(void)memcpy((void *)&wins[1][0], (const void *)&wins_B[1][0], sizeof(uint16_t)*ALGS);
(void)memcpy((void *)&wins_B[0][0], (const void *)&wins[0][0], sizeof(uint16_t)*ALGS);
(void)memcpy((void *)&wins_B[1][0], (const void *)&wins[1][0], sizeof(uint16_t)*ALGS);


	//(void)memcpy((void *)&wins_T[3][0], (const void *)&wins[3][0], sizeof(uint16_t)*ALGS);
	//start range d
	for(start_d = min_start_d; start_d<max_end_d; start_d++){
//		printf("%u/MAX_SIZE\n", start_d);
		//(void)memcpy((void *)&wins_T[3][0], (const void *)&wins[3][0], sizeof(uint16_t)*ALGS);
		//(void)memcpy((void *)&wins_T[0][0], (const void *)&wins[0][0], sizeof(uint16_t)*ALGS);
		//(void)memcpy((void *)&wins_T[1][0], (const void *)&wins[1][0], sizeof(uint16_t)*ALGS);
		(void)memcpy((void *)&wins_T[2][0], (const void *)&wins[2][0], sizeof(uint16_t)*ALGS);
		//pick d winning alg up front
		for(b=c=0; b<ALGS; b++){
			if(wins_T[3][b] > c){
				winners[3] = b;
				c = wins_T[3][b];
			}
		}
		//wins_B
		//(void)memcpy((void *)&wins[0][0], (const void *)&wins_B[0][0], sizeof(uint16_t)*ALGS);
		(void)memcpy((void *)&wins[1][0], (const void *)&wins_B[1][0], sizeof(uint16_t)*ALGS);

		//start range c
		for(start_c = min_start_c; start_c<start_d; start_c++){
			(void)memcpy((void *)&wins_T[0][0], (const void *)&wins[0][0], sizeof(uint16_t)*ALGS);
			(void)memcpy((void *)&wins_T[1][0], (const void *)&wins[1][0], sizeof(uint16_t)*ALGS);
			//pick c winning alg
			for(b=c=0; b<ALGS; b++){
				if(wins_T[2][b] > c){
					winners[2] = b;
					c = wins_T[2][b];
				}
			}

			//start range b
			for(start_b = min_start_b; start_b<start_c; start_b++){

				//pick b and a winning algs
				for(a=b=c=0; b<ALGS; b++){
					if(wins_T[0][b] > c){
						winners[0] = b;
						c = wins_T[0][b];
					}
					if(wins_T[1][b] > a){
						winners[1] = b;
						a = wins_T[1][b];
					}
				}
				for(a=cur_wins=0; a<RANGES; a++)
					cur_wins += wins_T[a][winners[a]];
				if(cur_wins > max_wins){
					//winning_starts[0] = start_a;
					winning_starts[1] = start_b;
					winning_starts[2] = start_c;
					winning_starts[3] = start_d;
					//for(a=0; a<RANGES; a++)
					//	winning_algs[a] = winners[a];
					(void)memcpy((void *)winning_algs, (const void *)winners, sizeof(uint16_t)*RANGES);
					max_wins = cur_wins;
				}
				a = fastest[start_b];
				wins_T[1][a]--;
				wins_T[0][a]++;
			}
			//selectively decremember alg in range c that gained a win
			a = fastest[start_c];
			wins_T[2][a]--;
			wins[1][a]++;
		}
		//selectively decrement alg that lost a win
		a = fastest[start_d];
		wins_T[3][a]--;
		wins[2][a]++;
	}

	printf("config with most wins in first %u:\n", test_max);
	printf("\twinning_alg\t\tstart\n");
	for(a=0; a<4; a++){

		Print_Alg(winning_algs[a]);
		printf("(%u)\t\t%u\n", winning_algs[a], winning_starts[a]);

	}

	return 0;
}
