#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>

#ifdef IS32BIT
#define ALGS 7
#else
#define ALGS 8
#endif

#define RANGES 4

#ifndef MAX_SIZE
#define MAX_SIZE 1024
#endif

/*
uint8_t ties[MAX_SIZE];

void SET_ONE(const uint16_t pos, const uint8_t bit){
        uint8_t cur = ties[pos];
        uint8_t b0 = (bit == 0) ? 1   : (((cur<<7) & 128) >= 128);
        uint8_t b1 = (bit == 1) ? 2   : (((cur<<6) & 128) >= 128)<<1;
        uint8_t b2 = (bit == 2) ? 4   : (((cur<<5) & 128) >= 128)<<2;
        uint8_t b3 = (bit == 3) ? 8   : (((cur<<4) & 128) >= 128)<<3;
        uint8_t b4 = (bit == 4) ? 16  : (((cur<<3) & 128) >= 128)<<4;
        uint8_t b5 = (bit == 5) ? 32  : (((cur<<2) & 128) >= 128)<<5;
        uint8_t b6 = (bit == 6) ? 64  : (((cur<<1) & 128) >= 128)<<6;
        uint8_t b7 = (bit == 7) ? 128 : (((cur) & 128)  >= 128)<<7;

        ties[pos] = b0+b1+b2+b3+b4+b5+b6+b7;
}

uint8_t IS_SET(const uint16_t pos, const uint8_t bit){
        uint8_t cur = ties[pos];
        cur <<= (7-bit);
        cur &= 128;

        if(cur >= 128)
                return 1;
        else
                return 0;
}
*/

/*
void Increment_Winners(uint16_t start, const uint16_t end, uint16_t *output){
	uint8_t a;
	while(start<end){
		for(a=0; a<ALGS; a++){
			if(IS_SET(start, a))
				output[a]++;
		}
		start++;
	}
}
*/

void Print_with_spaces(const char *alg, const uint16_t length){
	uint16_t a = strlen(alg);
	printf("%s", alg);
	while(a<length){
		printf(" ");
		a++;
	}
}

int main(int argc, char **argv){
	FILE *in;
	uint16_t a,b,c=0;
	float fa,fb;
	float cur_bpc = 0;
	float max_bpc = 0;
//	float cur_wins = 0;
//	float max_wins = 0; //UINT32_MAX;

	uint16_t winning_starts[RANGES] = {0,0,0,0};
	float **data;
	uint16_t read_buffer_size = 16;
	char *read_buffer;
	uint16_t start_b = 0;
	uint16_t start_c = 0;
	uint16_t start_d = 0;
//	const uint32_t forced_a_switch = MAX_SIZE/4;
	const uint16_t min_start_a = 0;
	const uint16_t min_start_b = 1;
	const uint16_t min_start_c = 2;
	const uint16_t min_start_d = 3;
	const uint16_t max_end_d = MAX_SIZE;
	uint16_t winners[RANGES] = {0,0,0,0};
	float wins[RANGES-1][ALGS];
	float wins_T[RANGES][ALGS];
	float wins_B[ALGS];

#ifdef IS32BIT
	const char *algs[] = {"byte_loop", "loop", "unrolled_loop", "rep_byte", "rep_4byte", "vector_loop", "libcall"};
#else
	const char *algs[] = {"byte_loop", "loop", "unrolled_loop", "rep_byte", "rep_4byte", "rep_8byte", "vector_loop", "libcall"};
#endif

	//if(argc != 1)
	//	max_end_d = atoi(argv[1]);
	printf("Testing to %u\n", max_end_d);

	data = (float **)malloc(sizeof(float *)*max_end_d);
	for(a=min_start_a; a<max_end_d; a++){
		data[a] = (float *)malloc(sizeof(float)*ALGS);
	}

	read_buffer = (char *)malloc(read_buffer_size);

	(void)memset((void *)wins, '\0', sizeof(float)*(RANGES-1)*ALGS);
	(void)memset((void *)wins_T, '\0', sizeof(float)*RANGES*ALGS);

	for(a=0; a<8; a++){
#ifdef IS32BIT
		if(a == 5)
			continue;
#endif
		(void)sprintf(read_buffer, "results/r%u.txt", a+1);
		in = fopen(read_buffer, "r");
		b = 0;
		while( fgets(read_buffer, read_buffer_size, in) != NULL){
			if(b == max_end_d)
				break;
//			if(atoi(read_buffer) < UINT16_MAX)
//				data[b][c] = atoi(read_buffer);
//			else
//				data[b][c] = UINT16_MAX;
//convert to bytes/cycle
			data[b][c] = (float)(b+1)/atof(read_buffer);
			b++;
		}
		fclose(in);
		c++;
	}
	free(read_buffer);
/*
	for(a = min_start_a; a<max_end_d; a++){
		c = UINT16_MAX;
		for(b=0; b<ALGS; b++){
			if(data[a][b] < c)
				c = data[a][b];
		}
		for(b=0; b<ALGS; b++){
			if(data[a][b] == c)
				SET_ONE(a, b);
		}
		free(data[a]);
	}
	//let's assume min_starts might be changed to be safe
	//for(a = min_start_a; a<max_end_d; a++)
	//	free(data[a]);
	free(data);
*/

	//calculate wins for each alg of each range on start (NOTE: will be 1 position checked unless min_start_* changed)
	//except range d where max_end_d - min_start_d are checked

//	Increment_Winners(min_start_a, min_start_b, wins[min_start_a]);
//	Increment_Winners(min_start_b, min_start_c, wins[min_start_b]);
//	Increment_Winners(min_start_c, min_start_d, wins[min_start_c]);
//	Increment_Winners(min_start_d, max_end_d, wins_T[min_start_d]);

	for(a=min_start_a; a<min_start_b; a++){
		for(b=0; b<ALGS; b++){
			wins[min_start_a][b] += data[a][b];
		}
	}

	for(a=min_start_b; a<min_start_c; a++){
		for(b=0; b<ALGS; b++){
			wins[min_start_b][b] += data[a][b];
		}
	}

	for(a=min_start_c; a<min_start_d; a++){
		for(b=0; b<ALGS; b++){
			wins[min_start_c][b] += data[a][b];
		}
	}

	for(a=min_start_d; a<max_end_d; a++){
		for(b=0; b<ALGS; b++){
			wins_T[min_start_d][b] += data[a][b];
		}
	}


	(void)memcpy((void *)wins_B, (const void *)wins[min_start_a], sizeof(float)*ALGS);
	//start range d
	for(start_d = min_start_d; start_d<max_end_d; start_d++){
//		printf("%u/MAX_SIZE\n", start_d);
		(void)memcpy((void *)wins_T[min_start_c], (const void *)wins[min_start_c], sizeof(float)*ALGS);
		//pick d winning alg
		//c = UINT32_MAX;
		fa = 0;
		for(b=0; b<ALGS; b++){
			if(wins_T[min_start_d][b] > fa){
				fa = wins_T[min_start_d][b];
				a = b;
			}
		}
		winners[min_start_d] = a;
		(void)memcpy((void *)wins[min_start_b], (const void *)wins_B, sizeof(float)*ALGS);
		//start range c
		for(start_c = min_start_c; start_c<start_d; start_c++){
			(void)memcpy((void *)wins_T, (const void *)wins, sizeof(float)*ALGS*2);
			fa = 0;
			for(b=0; b<ALGS; b++){
				if(wins_T[min_start_c][b] > fa){
					fa = wins_T[min_start_c][b];
					a = b;
				}
			}
			winners[min_start_c] = a;
			//start range b
			for(start_b = min_start_b; start_b<start_c; start_b++){
				//if(start_b > forced_a_switch)
				//	break;
				//pick b and a winning algs
				fa = 0;
				fb = 0;
				for(b=0; b<ALGS; b++){
					if(wins_T[min_start_a][b] > fa){
						a = b;
						fa = wins_T[min_start_a][b];
						//winner_b = b;
					}
					if(wins_T[min_start_b][b] < fb)
						continue;
					winners[min_start_b] = b;
					fb = wins_T[min_start_b][b];
					//winner_a = b;
				}
				winners[min_start_a] = a;
				cur_bpc=0;
				for(a=0; a<RANGES; a++)
					cur_bpc += wins_T[a][winners[a]];
				for(b=0; b<ALGS; b++){
					//if(IS_SET(start_b, b)){

						wins_T[min_start_b][b] -= data[start_b][b];
						wins_T[min_start_a][b] += data[start_b][b];

					//}
				}
				cur_bpc /= MAX_SIZE;

				if(cur_bpc < max_bpc)
					continue;
				winning_starts[min_start_b] = start_b;
				winning_starts[min_start_c] = start_c;
				winning_starts[min_start_d] = start_d;
				max_bpc = cur_bpc;
			}
			//selectively dec/inc algs
			for(b=0; b<ALGS; b++){
				//if(IS_SET(start_c, b)){
					wins_T[min_start_c][b] -= data[start_c][b];
					wins[min_start_b][b] += data[start_c][b];
				//}
			}
		}
		//selectively dec/inc algs that lost/gained
		for(b=0; b<ALGS; b++){
			//if(IS_SET(start_d, b)){
				wins_T[min_start_d][b] -= data[start_d][b];
				wins[min_start_c][b] += data[start_d][b];
			//}
		}
	}
	(void)memset((void *)wins_B, '\0', sizeof(float)*ALGS);
	printf("\nAvg bytes/cycles for all algs at each byte for all data\n");
	//Increment_Winners(min_start_a, max_end_d, wins_B);
	for(a=min_start_a; a<max_end_d; a++){
		for(b=0; b<ALGS; b++){
			wins_B[b] += data[a][b];

		}
	}
	for(a=0; a<ALGS; a++){
		printf("\t");
		Print_with_spaces(algs[a], 20);
		//cur_bpc = wins_B[a]/MAX_SIZE;
		printf("%.04f\n", wins_B[a]/MAX_SIZE);
		//d += wins_B[a];
	}
	printf("\n");
	//printf("\ncombined total: %u\n\n", d);
	printf("Config with best avg bytes/cycles:\n");
	printf("\tstart\tavg b/c\talg/s\n");
	//d = 0;
	for(a=0; a<RANGES; a++){
		if(a != min_start_d)
			c = winning_starts[a+1];
		else
			c = max_end_d;
		(void)memset((void *)wins_B, '\0', sizeof(float)*ALGS);
//		Increment_Winners(winning_starts[a], c, wins_B);
		for(start_b=winning_starts[a]; start_b<c; start_b++){
			for(b=0; b<ALGS; b++){
				wins_B[b] += data[start_b][b];
			}
		}
		//d = 0;
		//fa = 0;
		cur_bpc = 0; //UINT32_MAX;
		for(b=0; b<ALGS; b++){
			//cur_bpc = wins_B[b];
			if(wins_B[b] > cur_bpc){
				//cur_bpc = (double)wins_B[b];
				cur_bpc = wins_B[b];
			}
		}
		//cur_bpc = fa;
		c -= winning_starts[a];
                printf("\t%u\t%.04f\t", winning_starts[a], cur_bpc/c);
		max_bpc += cur_bpc;
                for(b=0; b<ALGS; b++){
                        if(wins_B[b] == cur_bpc)
                                printf("%s(%u) ", algs[b], b);
                }
                printf("\n");
        }
	printf("\nconfig avg: %.04f\n", max_bpc/MAX_SIZE);
	for(a = min_start_a; a<max_end_d; a++){
		free(data[a]);
	}
	free(data);

	return 0;
}
